diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 624a197299464f1cb2740697b04e289217ac636b..de46d4c8278f952d375fc303683b9be6a6b44373 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -45,8 +45,6 @@ stages: fi - export CTEST_OUTPUT_ON_FAILURE=1 - export CTEST_PARALLEL_LEVEL=4 - # enforce (more or less) warning-free builds - - export CXXFLAGS="-Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" - mkdir -p "./builddir/$CI_JOB_NAME" - pushd "./builddir/$CI_JOB_NAME" - cmake ../.. @@ -64,6 +62,7 @@ stages: -DWITH_EXAMPLES=${WITH_EXAMPLES} -DWITH_TOOLS=${WITH_TOOLS} -DWITH_PYTHON=${WITH_PYTHON} + -DWITH_CI_FLAGS=yes # "install" implies the "all" target - ninja ${NINJAFLAGS} install - if [[ ${WITH_TESTS} == "yes" ]]; then diff --git a/CMakeLists.txt b/CMakeLists.txt index 68252ba6a74c45adc73dbd4e18b0afb5c81e5a67..ea0d8a30b6144e74950d2b1d9fde302a50e53e28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,11 @@ set( CMAKE_SHARED_LINKER_FLAGS "" ) set( CMAKE_SHARED_LINKER_FLAGS_DEBUG "-rdynamic" ) set( CMAKE_SHARED_LINKER_FLAGS_RELEASE "" ) +if( ${WITH_CI_FLAGS} ) + # enforce (more or less) warning-free builds + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated -Wno-error=deprecated-declarations -Wno-error=uninitialized -Wno-error=vla" ) +endif() + # set additional Debug/Release options using generator expressions # (that way we can exclude some options for specific targets, see https://stackoverflow.com/a/59734798 for details) add_compile_options( diff --git a/build b/build index 67492f02c3c5c90647dceb9c607e9e0f0891d108..ee74fa87bfb98b0db0bd87976afcfe8ad9f741a8 100755 --- a/build +++ b/build @@ -31,6 +31,7 @@ WITH_EXAMPLES="yes" WITH_PYTHON="yes" WITH_TOOLS="yes" WITH_BENCHMARKS="yes" +WITH_CI_FLAGS="no" for option in "$@" do @@ -62,6 +63,7 @@ do --with-tools=* ) WITH_TOOLS="${option#*=}" ;; --with-benchmarks=* ) WITH_BENCHMARKS="${option#*=}" ;; --with-python=* ) WITH_PYTHON="${option#*=}" ;; + --with-ci-flags=* ) WITH_CI_FLAGS="${option#*=}" ;; * ) echo "Unknown option ${option}. Use --help for more information." exit 1 ;; @@ -142,6 +144,7 @@ cmake_command=( -DWITH_TOOLS=${WITH_TOOLS} -DWITH_PYTHON=${WITH_PYTHON} -DWITH_BENCHMARKS=${WITH_BENCHMARKS} + -DWITH_CI_FLAGS=${WITH_CI_FLAGS} -DDCMTK_DIR=${DCMTK_DIR} ) diff --git a/src/Benchmarks/BLAS/spmv.h b/src/Benchmarks/BLAS/spmv.h index d515d52d73d513d87b86d4b743d8b0e27b20e0ca..6685b9f766237dbcbe2d2a017eb1d8dac9a87135 100644 --- a/src/Benchmarks/BLAS/spmv.h +++ b/src/Benchmarks/BLAS/spmv.h @@ -15,10 +15,10 @@ #include "../Benchmarks.h" #include <TNL/Pointers/DevicePointer.h> -#include <TNL/Matrices/CSR.h> -#include <TNL/Matrices/Ellpack.h> -#include <TNL/Matrices/SlicedEllpack.h> -#include <TNL/Matrices/ChunkedEllpack.h> +#include <TNL/Matrices/Legacy/CSR.h> +#include <TNL/Matrices/Legacy/Ellpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> +#include <TNL/Matrices/Legacy/ChunkedEllpack.h> namespace TNL { namespace Benchmarks { diff --git a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h index aa4b29424d2b93b323017e5501231a57874ccfa4..b90b11088ef8f73511adb2ba5c58448e93e2bcf8 100644 --- a/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h +++ b/src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.h @@ -30,7 +30,7 @@ #include "../Benchmarks.h" #include "ordering.h" -#include <TNL/Matrices/SlicedEllpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> using namespace TNL; using namespace TNL::Benchmarks; diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h index 4aabf39cd4bae98bc411fcc95feef56672b039ca..ea39d80b7b8011f53e7187e53a62e8446bdb8b82 100644 --- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h @@ -55,7 +55,7 @@ #define HAVE_CUSOLVER #endif -#include <TNL/Matrices/SlicedEllpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> using namespace TNL; using namespace TNL::Benchmarks; @@ -365,8 +365,7 @@ struct LinearSolversBenchmark // load the matrix if( ends_with( file_matrix, ".mtx" ) ) { Matrices::MatrixReader< MatrixType > reader; - if( ! reader.readMtxFile( file_matrix, *matrixPointer ) ) - return false; + reader.readMtxFile( file_matrix, *matrixPointer ); } else { matrixPointer->load( file_matrix ); diff --git a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h index d29b680bc8835a5615073fbf61e3cc13a74dfca2..dad2cdd8dc309b38844c1c5eb27232f75d6092dc 100644 --- a/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h +++ b/src/Benchmarks/ODESolvers/tnl-benchmark-ode-solvers.h @@ -36,8 +36,6 @@ #include "Euler.h" #include "Merson.h" -#include <TNL/Matrices/SlicedEllpack.h> - using namespace TNL; using namespace TNL::Benchmarks; using namespace TNL::Pointers; diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 6bfee5ffef7702219493d124d72da5d15fc8c54b..02a26854dc06a5ad0de8230a6f351d24a2526975 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -17,14 +17,21 @@ #include "../Benchmarks.h" #include <TNL/Pointers/DevicePointer.h> -#include <TNL/Matrices/CSR.h> -#include <TNL/Matrices/Ellpack.h> -#include <TNL/Matrices/SlicedEllpack.h> -#include <TNL/Matrices/ChunkedEllpack.h> -#include <TNL/Matrices/AdEllpack.h> -#include <TNL/Matrices/BiEllpack.h> +#include <TNL/Matrices/Legacy/CSR.h> +#include <TNL/Matrices/Legacy/Ellpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> +#include <TNL/Matrices/Legacy/ChunkedEllpack.h> +#include <TNL/Matrices/Legacy/AdEllpack.h> +#include <TNL/Matrices/Legacy/BiEllpack.h> #include <TNL/Matrices/MatrixReader.h> +#include <TNL/Matrices/MatrixInfo.h> + +#include <TNL/Matrices/SparseMatrix.h> +#include <TNL/Matrices/MatrixType.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Containers/Segments/SlicedEllpack.h> using namespace TNL::Matrices; #include "cusparseCSRMatrix.h" @@ -36,6 +43,22 @@ namespace Benchmarks { template< typename Real, typename Device, typename Index > using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >; +// Segments based sparse matrix aliases +template< typename Real, typename Device, typename Index > +using SparseMatrix_CSR = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, Containers::Segments::CSR >; + +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Real, typename Device, typename Index > +using SparseMatrix_Ellpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, EllpackSegments >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +template< typename Real, typename Device, typename Index > +using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >; + // Get the name (with extension) of input matrix file std::string getMatrixFileName( const String& InputFileName ) { @@ -85,7 +108,7 @@ void printMatrixInfo( const Matrix& matrix, template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename, typename > class Vector = Containers::Vector > -bool +void benchmarkSpMV( Benchmark& benchmark, const String& inputFileName, bool verboseMR ) @@ -98,19 +121,7 @@ benchmarkSpMV( Benchmark& benchmark, CSR_DeviceMatrix CSRdeviceMatrix; // Read the matrix for CSR, to set up cuSPARSE - try - { - if( ! MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ) ) - { - throw std::bad_alloc(); - return false; - } - } - catch( std::bad_alloc& e ) - { - e.what(); - return false; - } + MatrixReader< CSR_HostMatrix >::readMtxFile( inputFileName, CSRhostMatrix, verboseMR ); #ifdef HAVE_CUDA // cuSPARSE handle setup @@ -140,19 +151,7 @@ benchmarkSpMV( Benchmark& benchmark, CudaVector deviceVector, deviceVector2; // Load the format - try - { - if( ! MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ) ) - { - throw std::bad_alloc(); - return false; - } - } - catch( std::bad_alloc& e ) - { - e.what(); - return false; - } + MatrixReader< HostMatrix >::readMtxFile( inputFileName, hostMatrix, verboseMR ); // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS), @@ -162,7 +161,7 @@ benchmarkSpMV( Benchmark& benchmark, { "non-zeros", convertToString( hostMatrix.getNumberOfNonzeroMatrixElements() ) }, { "rows", convertToString( hostMatrix.getRows() ) }, { "columns", convertToString( hostMatrix.getColumns() ) }, - { "matrix format", convertToString( getMatrixFormat( hostMatrix ) ) } + { "matrix format", MatrixInfo< HostMatrix >::getFormat() } //convertToString( getType( hostMatrix ) ) } } )); hostVector.setSize( hostMatrix.getColumns() ); @@ -244,7 +243,7 @@ benchmarkSpMV( Benchmark& benchmark, resultcuSPARSEDeviceVector2 = deviceVector2; - // Difference between GPU (curent format) and GPU-cuSPARSE results + // Difference between GPU (current format) and GPU-cuSPARSE results //Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); Real cuSparseDifferenceAbsMax = max( abs( resultDeviceVector2 - resultcuSPARSEDeviceVector2 ) ); //Real cuSparseDifferenceLpNorm = resultDeviceVector2.differenceLpNorm( resultcuSPARSEDeviceVector2, 1 ); @@ -279,26 +278,35 @@ benchmarkSpMV( Benchmark& benchmark, #endif std::cout << std::endl; - return true; } template< typename Real = double, typename Index = int > -bool +void benchmarkSpmvSynthetic( Benchmark& benchmark, const String& inputFileName, bool verboseMR ) { - bool result = true; - result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, inputFileName, verboseMR ); + + benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, inputFileName, verboseMR ); + + benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, SparseMatrix_SlicedEllpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); + benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); + + //// + // Segments based sparse matrices + + + // // AdEllpack is broken -// result |= benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); - return result; + // benchmarkSpMV< Real, Matrices::AdEllpack >( benchmark, inputFileName, verboseMR ); + //benchmarkSpMV< Real, Matrices::BiEllpack >( benchmark, inputFileName, verboseMR ); } } // namespace Benchmarks diff --git a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h index 77c079c4c562408a63182ca910c9ebfc5d68e111..65416f0432085f744ee66a80efb497242ef0db81 100644 --- a/src/Benchmarks/SpMV/tnl-benchmark-spmv.h +++ b/src/Benchmarks/SpMV/tnl-benchmark-spmv.h @@ -96,11 +96,11 @@ main( int argc, char* argv[] ) // * The guide on what parameters to use prints twice. // FIXME: When ./tnl-benchmark-spmv-dbg is called with '--help': // * The guide on what parameter to use print once. - // But then it CRASHES due to segfault: -// The program attempts to get unknown parameter openmp-enabled -// Aborting the program. -// terminate called after throwing an instance of 'int' -// [1] 17156 abort (core dumped) ~/tnl-dev/Debug/bin/./tnl-benchmark-spmv-dbg --help + // But then it CRASHES due to segfault: + // The program attempts to get unknown parameter openmp-enabled + // Aborting the program. + // terminate called after throwing an instance of 'int' + // [1] 17156 abort (core dumped) ~/tnl-dev/Debug/bin/./tnl-benchmark-spmv-dbg --help if( ! parseCommandLine( argc, argv, conf_desc, parameters ) ) { conf_desc.printUsage( argv[ 0 ] ); diff --git a/src/Python/pytnl/tnl/SparseMatrix.cpp b/src/Python/pytnl/tnl/SparseMatrix.cpp index e6584998313fa9e3c1314c6f67b99267815cf0a8..fe3ba5aca7f2dbb96817760620cf3369b3b43140 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.cpp +++ b/src/Python/pytnl/tnl/SparseMatrix.cpp @@ -3,9 +3,9 @@ #include "SparseMatrix.h" -#include <TNL/Matrices/CSR.h> -#include <TNL/Matrices/Ellpack.h> -#include <TNL/Matrices/SlicedEllpack.h> +#include <TNL/Matrices/Legacy/CSR.h> +#include <TNL/Matrices/Legacy/Ellpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> using CSR_host = TNL::Matrices::CSR< double, TNL::Devices::Host, int >; using CSR_cuda = TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >; diff --git a/src/Python/pytnl/tnl/SparseMatrix.h b/src/Python/pytnl/tnl/SparseMatrix.h index 1a32bd257f52a14f07579abe3671df1978cfc4d2..e4064e1a411364084e2422d1a18f8814a1271f0b 100644 --- a/src/Python/pytnl/tnl/SparseMatrix.h +++ b/src/Python/pytnl/tnl/SparseMatrix.h @@ -5,7 +5,7 @@ namespace py = pybind11; #include <TNL/String.h> #include <TNL/Containers/Vector.h> -#include <TNL/Matrices/CSR.h> +#include <TNL/Matrices/Legacy/CSR.h> template< typename Matrix > struct SpecificExports @@ -51,7 +51,7 @@ void export_Matrix( py::module & m, const char* name ) using VectorType = TNL::Containers::Vector< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >; - void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVector&) const = &Matrix::getCompressedRowLengths; + void (Matrix::* _getCompressedRowLengths)(typename Matrix::CompressedRowLengthsVectorView) const = &Matrix::getCompressedRowLengths; auto matrix = py::class_< Matrix, TNL::Object >( m, name ) .def(py::init<>()) @@ -72,7 +72,7 @@ void export_Matrix( py::module & m, const char* name ) .def("getCompressedRowLengths", _getCompressedRowLengths) // TODO: export for more types .def("setLike", &Matrix::template setLike< typename Matrix::RealType, typename Matrix::DeviceType, typename Matrix::IndexType >) - .def("getNumberOfMatrixElements", &Matrix::getNumberOfMatrixElements) + .def("getAllocatedElementsCount", &Matrix::getAllocatedElementsCount) .def("getNumberOfNonzeroMatrixElements", &Matrix::getNumberOfNonzeroMatrixElements) .def("reset", &Matrix::reset) .def("getRows", &Matrix::getRows) diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h new file mode 100644 index 0000000000000000000000000000000000000000..6b5c5b4e013c4a7283e1fe827fe59638dbbd0128 --- /dev/null +++ b/src/TNL/Algorithms/AtomicOperations.h @@ -0,0 +1,94 @@ +/*************************************************************************** + AtomicOperations.h - description + ------------------- + begin : Feb 26, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +// Implemented by: Tomas Oberhuber, Jakub Klinkovsky + +#pragma once + +#ifdef HAVE_CUDA +#include <cuda.h> +#endif +#include <TNL/Devices/Sequential.h> +#include <TNL/Devices/Host.h> +#include <TNL/Devices/Cuda.h> + +namespace TNL { +namespace Algorithms { + +template< typename Device > +struct AtomicOperations{}; + +template<> +struct AtomicOperations< Devices::Host > +{ + template< typename Value > + static void add( Value& v, const Value& a ) + { +#pragma omp atomic update + v += a; + } +}; + +template<> +struct AtomicOperations< Devices::Cuda > +{ + template< typename Value > + __cuda_callable__ + static void add( Value& v, const Value& a ) + { +#ifdef HAVE_CUDA + atomicAdd( &v, a ); +#endif // HAVE_CUDA + } + +#ifdef HAVE_CUDA + __device__ + static void add( double& v, const double& a ) + { +#if __CUDA_ARCH__ < 600 + unsigned long long int* v_as_ull = ( unsigned long long int* ) &v; + unsigned long long int old = *v_as_ull, assumed; + + do + { + assumed = old; + old = atomicCAS( v_as_ull, + assumed, + __double_as_longlong( a + __longlong_as_double( assumed ) ) ) ; + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } + while( assumed != old ); +#else // __CUDA_ARCH__ < 600 + atomicAdd( &v, a ); +#endif //__CUDA_ARCH__ < 600 + } +#else // HAVE_CUDA + static void add( double& v, const double& a ){} +#endif // HAVE_CUDA + + __cuda_callable__ + static void add( long int& v, const long int& a ) + { +#ifdef HAVE_CUDA + TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." ); +#endif // HAVE_CUDA + } + + __cuda_callable__ + static void add( short int& v, const short int& a ) + { +#ifdef HAVE_CUDA + TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." ); +#endif // HAVE_CUDA + } +}; +} //namespace Algorithms +} //namespace TNL diff --git a/src/TNL/Algorithms/CudaScanKernel.h b/src/TNL/Algorithms/CudaScanKernel.h index 79a2019594922eee640672edb12d8ef6e9132dd0..97912b2343907e504db133ae545f5f420f18f0e3 100644 --- a/src/TNL/Algorithms/CudaScanKernel.h +++ b/src/TNL/Algorithms/CudaScanKernel.h @@ -277,7 +277,7 @@ struct CudaScanKernelLauncher elementsInBlock, &deviceInput[ gridOffset ], &deviceOutput[ gridOffset ], - &blockSums[ gridIdx * maxGridSize() ] ); + &blockSums.getData()[ gridIdx * maxGridSize() ] ); } // synchronize the null-stream after all grids diff --git a/src/TNL/Algorithms/Scan.hpp b/src/TNL/Algorithms/Scan.hpp index 7b6d31ece513144c5b0cec9947a232b940fb5e30..fc1f2f1e5449a12c56b525c92854705e7bd003e6 100644 --- a/src/TNL/Algorithms/Scan.hpp +++ b/src/TNL/Algorithms/Scan.hpp @@ -225,8 +225,8 @@ perform( Vector& v, CudaScanKernelLauncher< Type, RealType, IndexType >::perform( end - begin, - &v[ begin ], // input - &v[ begin ], // output + &v.getData()[ begin ], // input + &v.getData()[ begin ], // output reduction, zero ); #else @@ -251,8 +251,8 @@ performFirstPhase( Vector& v, return CudaScanKernelLauncher< Type, RealType, IndexType >::performFirstPhase( end - begin, - &v[ begin ], // input - &v[ begin ], // output + &v.getData()[ begin ], // input + &v.getData()[ begin ], // output reduction, zero ); #else @@ -279,7 +279,7 @@ performSecondPhase( Vector& v, CudaScanKernelLauncher< Type, RealType, IndexType >::performSecondPhase( end - begin, - &v[ begin ], // output + &v.getData()[ begin ], // output blockShifts.getData(), reduction, shift ); diff --git a/src/TNL/Assert.h b/src/TNL/Assert.h index dc4ba7cf439d4ea25576d75fd1605c4400f6fa23..630abd09fd04272144c47e4bbfeff17f2d6442a4 100644 --- a/src/TNL/Assert.h +++ b/src/TNL/Assert.h @@ -394,7 +394,7 @@ TNL_IMPL_CMP_HELPER_( GT, > ); pred( __TNL_JOIN_STRINGS( val1, op, val2 ), \ msg, __FILE__, __TNL_PRETTY_FUNCTION, __LINE__, \ #val1, #val2, val1, val2 ) - + // Main definitions of the TNL_ASSERT_* macros // unary #define TNL_ASSERT_TRUE( val, msg ) \ diff --git a/src/TNL/Containers/Array.h b/src/TNL/Containers/Array.h index 117cb32ae4a84afb803f84dbf34d54d1948c0f2b..1166245110501aa6c8c66c5a81448ab82895b54d 100644 --- a/src/TNL/Containers/Array.h +++ b/src/TNL/Containers/Array.h @@ -137,6 +137,15 @@ class Array */ explicit Array( const IndexType& size, const AllocatorType& allocator = AllocatorType() ); + /** + * \brief Constructs an array with given size and value. + * + * \param size The number of array elements to be allocated. + * \param value The value all elements will be set to. + * \param allocator The allocator to be associated with this array. + */ + explicit Array( const IndexType& size, const Value& value, const AllocatorType& allocator = AllocatorType() ); + /** * \brief Constructs an array with given size and copies data from given * pointer. @@ -446,7 +455,10 @@ class Array * to the memory space where the array was allocated. For example, if the * array was allocated in the host memory, it can be called only from * host, and if the array was allocated in the device memory, it can be - * called only from device kernels. + * called only from device kernels. If NDEBUG is not defined, assertions + * inside this methods performs runtime checks for cross-device memory + * accesses which lead to segmentation fault. If you need to do just a + * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Reference to the \e i-th element. @@ -460,7 +472,10 @@ class Array * to the memory space where the array was allocated. For example, if the * array was allocated in the host memory, it can be called only from * host, and if the array was allocated in the device memory, it can be - * called only from device kernels. + * called only from device kernels. If NDEBUG is not defined, assertions + * inside this methods performs runtime checks for cross-device memory + * accesses which lead to segmentation fault. If you need to do just a + * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Constant reference to the \e i-th element. diff --git a/src/TNL/Containers/Array.hpp b/src/TNL/Containers/Array.hpp index 24e3f8b43a024c8c8c3b87213a31886c595caceb..ab81db7aabc214ab66751905446849179aa1975a 100644 --- a/src/TNL/Containers/Array.hpp +++ b/src/TNL/Containers/Array.hpp @@ -62,6 +62,18 @@ Array( const IndexType& size, const AllocatorType& allocator ) this->setSize( size ); } +template< typename Value, + typename Device, + typename Index, + typename Allocator > +Array< Value, Device, Index, Allocator >:: +Array( const IndexType& size, const Value& value, const AllocatorType& allocator ) +: allocator( allocator ) +{ + this->setSize( size ); + *this = value; +} + template< typename Value, typename Device, typename Index, @@ -509,6 +521,11 @@ Value& Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." ); +#else + TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." ); +#endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return this->data[ i ]; @@ -523,6 +540,11 @@ const Value& Array< Value, Device, Index, Allocator >:: operator[]( const Index& i ) const { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." ); +#else + TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." ); +#endif TNL_ASSERT_GE( i, (Index) 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return this->data[ i ]; diff --git a/src/TNL/Containers/ArrayView.h b/src/TNL/Containers/ArrayView.h index c06ad56dcc113541167b9d012ca4caf836a4f5c5..5b9766ffd2b07d00318edff0c3c24080a020d3b6 100644 --- a/src/TNL/Containers/ArrayView.h +++ b/src/TNL/Containers/ArrayView.h @@ -338,7 +338,10 @@ public: * to the memory space where the data was allocated. For example, if the * data was allocated in the host memory, it can be called only from * host, and if the data was allocated in the device memory, it can be - * called only from device kernels. + * called only from device kernels. If NDEBUG is not defined, assertions + * inside this methods performs runtime checks for cross-device memory + * accesses which lead to segmentation fault. If you need to do just a + * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Reference to the \e i-th element. @@ -353,7 +356,10 @@ public: * to the memory space where the data was allocated. For example, if the * data was allocated in the host memory, it can be called only from * host, and if the data was allocated in the device memory, it can be - * called only from device kernels. + * called only from device kernels. If NDEBUG is not defined, assertions + * inside this methods performs runtime checks for cross-device memory + * accesses which lead to segmentation fault. If you need to do just a + * pointer arithmetics use \e getData instead. * * \param i The index of the element to be accessed. * \return Constant reference to the \e i-th element. diff --git a/src/TNL/Containers/ArrayView.hpp b/src/TNL/Containers/ArrayView.hpp index c3c39bc10be8dd846331d1086fc1d22b42b8c6c7..e36182cd54acfc17075a73944cb72df7ed3eb042 100644 --- a/src/TNL/Containers/ArrayView.hpp +++ b/src/TNL/Containers/ArrayView.hpp @@ -239,6 +239,11 @@ __cuda_callable__ Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." ); +#else + TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." ); +#endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return data[ i ]; @@ -252,6 +257,11 @@ const Value& ArrayView< Value, Device, Index >:: operator[]( Index i ) const { +#ifdef __CUDA_ARCH__ + TNL_ASSERT_TRUE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on CUDA device from CUDA device." ); +#else + TNL_ASSERT_FALSE( (std::is_same< Device, Devices::Cuda >{}()), "Attempt to access data not allocated on the host from the host." ); +#endif TNL_ASSERT_GE( i, 0, "Element index must be non-negative." ); TNL_ASSERT_LT( i, this->getSize(), "Element index is out of bounds." ); return data[ i ]; diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h new file mode 100644 index 0000000000000000000000000000000000000000..89cad0c6af3d80d9b9b78d336f4dfb95ff69cfc6 --- /dev/null +++ b/src/TNL/Containers/Segments/CSR.h @@ -0,0 +1,133 @@ +/*************************************************************************** + CSR.h - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <type_traits> + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/CSRView.h> +#include <TNL/Containers/Segments/SegmentView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +class CSR +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; + using SegmentsSizes = OffsetsHolder; + template< typename Device_, typename Index_ > + using ViewTemplate = CSRView< Device_, Index_ >; + using ViewType = CSRView< Device, Index >; + using ConstViewType = CSRView< Device, std::add_const_t< Index > >; + using SegmentViewType = SegmentView< IndexType, true >; + + CSR(); + + CSR( const SegmentsSizes& sizes ); + + CSR( const CSR& segments ); + + CSR( const CSR&& segments ); + + static String getSerializationType(); + + static String getSegmentsType(); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + ViewType getView(); + + ConstViewType getConstView() const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + /*** + * \brief Returns size of the segment number \r segmentIdx + */ + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /*** + * \brief Returns number of elements managed by all segments. + */ + __cuda_callable__ + IndexType getSize() const; + + /*** + * \brief Returns number of elements that needs to be allocated. + */ + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + __cuda_callable__ + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + CSR& operator=( const CSR& rhsSegments ) = default; + + template< typename Device_, typename Index_, typename IndexAllocator_ > + CSR& operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ); + + void save( File& file ) const; + + void load( File& file ); + + protected: + + OffsetsHolder offsets; +}; + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/CSR.hpp> diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9a948b04e2dac7311c6eab9f4149cf779256c59f --- /dev/null +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -0,0 +1,291 @@ +/*************************************************************************** + CSR.hpp - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Containers/Segments/details/CSR.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: +CSR() +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: +CSR( const SegmentsSizes& segmentsSizes ) +{ + this->setSegmentsSizes( segmentsSizes ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: +CSR( const CSR& csr ) : offsets( csr.offsets ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator > +CSR< Device, Index, IndexAllocator >:: +CSR( const CSR&& csr ) : offsets( std::move( csr.offsets ) ) +{ + +} + +template< typename Device, + typename Index, + typename IndexAllocator > +String +CSR< Device, Index, IndexAllocator >:: +getSerializationType() +{ + return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + +template< typename Device, + typename Index, + typename IndexAllocator > +String +CSR< Device, Index, IndexAllocator >:: +getSegmentsType() +{ + return ViewType::getSegmentsType(); +} + +template< typename Device, + typename Index, + typename IndexAllocator > + template< typename SizesHolder > +void +CSR< Device, Index, IndexAllocator >:: +setSegmentsSizes( const SizesHolder& sizes ) +{ + details::CSR< Device, Index >::setSegmentsSizes( sizes, this->offsets ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +typename CSR< Device, Index, IndexAllocator >::ViewType +CSR< Device, Index, IndexAllocator >:: +getView() +{ + return ViewType( this->offsets.getView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +typename CSR< Device, Index, IndexAllocator >::ConstViewType +CSR< Device, Index, IndexAllocator >:: +getConstView() const +{ + return ConstViewType( this->offsets.getConstView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +__cuda_callable__ +Index +CSR< Device, Index, IndexAllocator >:: +getSegmentsCount() const +{ + return this->offsets.getSize() - 1; +} + +template< typename Device, + typename Index, + typename IndexAllocator > +__cuda_callable__ +Index +CSR< Device, Index, IndexAllocator >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +__cuda_callable__ +Index +CSR< Device, Index, IndexAllocator >:: +getSize() const +{ + return this->getStorageSize(); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +__cuda_callable__ +Index +CSR< Device, Index, IndexAllocator >:: +getStorageSize() const +{ + return details::CSR< Device, Index >::getStorageSize( this->offsets ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > +__cuda_callable__ +Index +CSR< Device, Index, IndexAllocator >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx ] + localIdx; +#else + return offsets.getElement( segmentIdx ) + localIdx; +#endif + } + return offsets[ segmentIdx ] + localIdx; +} + +template< typename Device, + typename Index, + typename IndexAllocator > +__cuda_callable__ +void +CSR< Device, Index, IndexAllocator >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator > +__cuda_callable__ +auto +CSR< Device, Index, IndexAllocator >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType +{ + return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ] ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > + template< typename Function, typename... Args > +void +CSR< Device, Index, IndexAllocator >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto offsetsView = this->offsets.getConstView(); + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = offsetsView[ segmentIdx ]; + const IndexType end = offsetsView[ segmentIdx + 1 ]; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator> + template< typename Function, typename... Args > +void +CSR< Device, Index, IndexAllocator >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +CSR< Device, Index, IndexAllocator >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + const auto offsetsView = this->offsets.getConstView(); + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + RealType aux( zero ); + bool compute( true ); + IndexType localIdx( 0 ); + for( IndexType j = begin; j < end && compute; j++ ) + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +CSR< Device, Index, IndexAllocator >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator > + template< typename Device_, typename Index_, typename IndexAllocator_ > +CSR< Device, Index, IndexAllocator >& +CSR< Device, Index, IndexAllocator >:: +operator=( const CSR< Device_, Index_, IndexAllocator_ >& source ) +{ + this->offsets = source.offsets; + return *this; +} + +template< typename Device, + typename Index, + typename IndexAllocator > +void +CSR< Device, Index, IndexAllocator >:: +save( File& file ) const +{ + file << this->offsets; +} + +template< typename Device, + typename Index, + typename IndexAllocator > +void +CSR< Device, Index, IndexAllocator >:: +load( File& file ) +{ + file >> this->offsets; +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h new file mode 100644 index 0000000000000000000000000000000000000000..f7cf815d04eb87e411cd213e0b0ca659c60bbb6c --- /dev/null +++ b/src/TNL/Containers/Segments/CSRView.h @@ -0,0 +1,132 @@ +/*************************************************************************** + CSRView.h - description + ------------------- + begin : Dec 11, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <type_traits> + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/SegmentView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index > +class CSRView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const< IndexType >::type >; + using ConstOffsetsView = typename Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type >::ConstViewType; + using ViewType = CSRView; + template< typename Device_, typename Index_ > + using ViewTemplate = CSRView< Device_, Index_ >; + using ConstViewType = CSRView< Device, std::add_const_t< Index > >; + using SegmentViewType = SegmentView< IndexType >; + + __cuda_callable__ + CSRView(); + + __cuda_callable__ + CSRView( const OffsetsView& offsets ); + + __cuda_callable__ + CSRView( const OffsetsView&& offsets ); + + __cuda_callable__ + CSRView( const CSRView& csr_view ); + + __cuda_callable__ + CSRView( const CSRView&& csr_view ); + + static String getSerializationType(); + + static String getSegmentsType(); + + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + /*** + * \brief Returns size of the segment number \r segmentIdx + */ + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /*** + * \brief Returns number of elements managed by all segments. + */ + __cuda_callable__ + IndexType getSize() const; + + /*** + * \brief Returns number of elements that needs to be allocated. + */ + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + __cuda_callable__ + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + CSRView& operator=( const CSRView& view ); + + void save( File& file ) const; + + void load( File& file ); + + protected: + + OffsetsView offsets; +}; + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/CSRView.hpp> diff --git a/src/TNL/Containers/Segments/CSRView.hpp b/src/TNL/Containers/Segments/CSRView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..02be7f099f1fd9446200d8e10340c5a6bdc6afed --- /dev/null +++ b/src/TNL/Containers/Segments/CSRView.hpp @@ -0,0 +1,264 @@ +/*************************************************************************** + CSRView.hpp - description + ------------------- + begin : Dec 11, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/CSRView.h> +#include <TNL/Containers/Segments/details/CSR.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView() +{ +} + +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const OffsetsView&& offsets_view ) + : offsets( offsets_view ) +{ +} + +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const CSRView& csr_view ) + : offsets( csr_view.offsets ) +{ +} + +template< typename Device, + typename Index > +__cuda_callable__ +CSRView< Device, Index >:: +CSRView( const CSRView&& csr_view ) + : offsets( std::move( csr_view.offsets ) ) +{ +} + +template< typename Device, + typename Index > +String +CSRView< Device, Index >:: +getSerializationType() +{ + return "CSR< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + +template< typename Device, + typename Index > +String +CSRView< Device, Index >:: +getSegmentsType() +{ + return "CSR"; +} + +template< typename Device, + typename Index > +__cuda_callable__ +typename CSRView< Device, Index >::ViewType +CSRView< Device, Index >:: +getView() +{ + return ViewType( this->offsets ); +} + +template< typename Device, + typename Index > +__cuda_callable__ +typename CSRView< Device, Index >::ConstViewType +CSRView< Device, Index >:: +getConstView() const +{ + return ConstViewType( this->offsets.getConstView() ); +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getSegmentsCount() const +{ + return this->offsets.getSize() - 1; +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return details::CSR< Device, Index >::getSegmentSize( this->offsets, segmentIdx ); +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getSize() const +{ + return this->getStorageSize(); +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getStorageSize() const +{ + return details::CSR< Device, Index >::getStorageSize( this->offsets ); +} + +template< typename Device, + typename Index > +__cuda_callable__ +Index +CSRView< Device, Index >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx ] + localIdx; +#else + return offsets.getElement( segmentIdx ) + localIdx; +#endif + } + return offsets[ segmentIdx ] + localIdx; +} + +template< typename Device, + typename Index > +__cuda_callable__ +void +CSRView< Device, Index >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index > +__cuda_callable__ +auto +CSRView< Device, Index >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType +{ + return SegmentViewType( offsets[ segmentIdx ], offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ], 1 ); +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +CSRView< Device, Index >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto offsetsView = this->offsets; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = offsetsView[ segmentIdx ]; + const IndexType end = offsetsView[ segmentIdx + 1 ]; + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) + f( segmentIdx, localIdx++, globalIdx, compute, args... ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Function, typename... Args > +void +CSRView< Device, Index >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +CSRView< Device, Index >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + const auto offsetsView = this->offsets.getConstView(); + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = offsetsView[ i ]; + const IndexType end = offsetsView[ i + 1 ]; + RealType aux( zero ); + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j++ ) + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); +} + +template< typename Device, + typename Index > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +CSRView< Device, Index >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index > +CSRView< Device, Index >& +CSRView< Device, Index >:: +operator=( const CSRView& view ) +{ + this->offsets.bind( view.offsets ); + return *this; +} + +template< typename Device, + typename Index > +void +CSRView< Device, Index >:: +save( File& file ) const +{ + file << this->offsets; +} + +template< typename Device, + typename Index > +void +CSRView< Device, Index >:: +load( File& file ) +{ + file >> this->offsets; +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h new file mode 100644 index 0000000000000000000000000000000000000000..a1188a854e952f15f1f2449cc33c9535ccfac10a --- /dev/null +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -0,0 +1,131 @@ +/*************************************************************************** + Ellpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/EllpackView.h> +#include <TNL/Containers/Segments/SegmentView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int Alignment = 32 > +class Ellpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; + template< typename Device_, typename Index_ > + using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >; + using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; + //using ConstViewType = EllpackView< Device, std::add_const_t< Index >, RowMajorOrder, Alignment >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + + Ellpack(); + + Ellpack( const SegmentsSizes& sizes ); + + Ellpack( const IndexType segmentsCount, const IndexType segmentSize ); + + Ellpack( const Ellpack& segments ); + + Ellpack( const Ellpack&& segments ); + + static String getSerializationType(); + + static String getSegmentsType(); + + ViewType getView(); + + //ConstViewType getConstView() const; + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ); + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + __cuda_callable__ + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + Ellpack& operator=( const Ellpack& source ) = default; + + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ > + Ellpack& operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source ); + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Containers +} // namespace TNL + +#include <TNL/Containers/Segments/Ellpack.hpp> diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9c59c5529eada436df075bd130ddaa16f0ef20ea --- /dev/null +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -0,0 +1,412 @@ +/*************************************************************************** + Ellpack.hpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/Ellpack.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack() + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack( const SegmentsSizes& segmentsSizes ) + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) +{ + this->setSegmentsSizes( segmentsSizes ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack( const IndexType segmentsCount, const IndexType segmentSize ) + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) +{ + this->setSegmentsSizes( segmentsCount, segmentSize ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack( const Ellpack& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack( const Ellpack&& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +String +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSerializationType() +{ + return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +String +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSegmentsType() +{ + return ViewType::getSegmentsType(); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ViewType +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getView() +{ + return ViewType( segmentSize, size, alignedSize ); +} + +/*template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +typename Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >::ConstViewType +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getConstView() const +{ + return ConstViewType( segmentSize, size, alignedSize ); +}*/ + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > + template< typename SizesHolder > +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +setSegmentsSizes( const SizesHolder& sizes ) +{ + this->segmentSize = max( sizes ); + this->size = sizes.getSize(); + if( RowMajorOrder ) + this->alignedSize = this->size; + else + this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ) +{ + this->segmentSize = segmentSize; + this->size = segmentsCount; + if( RowMajorOrder ) + this->alignedSize = this->size; + else + this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); +} + + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSegmentsCount() const +{ + return this->size; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return this->segmentSize; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSize() const +{ + return this->size * this->segmentSize; +} + + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getStorageSize() const +{ + return this->alignedSize * this->segmentSize; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( RowMajorOrder ) + return segmentIdx * this->segmentSize + localIdx; + else + return segmentIdx + this->alignedSize * localIdx; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +auto +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType +{ + if( RowMajorOrder ) + return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 ); + else + return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > + template< typename Function, typename... Args > +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx * segmentSize; + const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx; + const IndexType end = storageSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += alignedSize ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > + template< typename Function, typename... Args > +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + bool compute( true ); + for( IndexType j = begin, localIdx = 0; j < end && compute; j++, localIdx++ ) + reduction( aux, fetch( i, localIdx, j, compute, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i; + const IndexType end = storageSize; + RealType aux( zero ); + bool compute( true ); + for( IndexType j = begin, localIdx = 0; j < end && compute; j += alignedSize, localIdx++ ) + reduction( aux, fetch( i, localIdx, j, compute, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ > +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >& +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source ) +{ + this->segmentSize = source.segmentSize; + this->size = source.size; + this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); + return *this; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +save( File& file ) const +{ + file.save( &segmentSize ); + file.save( &size ); + file.save( &alignedSize ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int Alignment > +void +Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +load( File& file ) +{ + file.load( &segmentSize ); + file.load( &size ); + file.load( &alignedSize ); +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h new file mode 100644 index 0000000000000000000000000000000000000000..10a89bd7bafd7de62a8e5f37f567478a3d4af1ee --- /dev/null +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -0,0 +1,126 @@ +/*************************************************************************** + EllpackView.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <type_traits> + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/SegmentView.h> + + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int Alignment = 32 > +class EllpackView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; + template< typename Device_, typename Index_ > + using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >; + using ViewType = EllpackView; + using ConstViewType = EllpackView< Device, std::add_const_t< Index > >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + + __cuda_callable__ + EllpackView(); + + __cuda_callable__ + EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize ); + + __cuda_callable__ + EllpackView( const EllpackView& ellpackView ); + + __cuda_callable__ + EllpackView( const EllpackView&& ellpackView ); + + static String getSerializationType(); + + static String getSegmentsType(); + + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + __cuda_callable__ + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + EllpackView& operator=( const EllpackView& view ); + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/EllpackView.hpp> diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..84086f380bfb12ac86113f82a76e40db1fbabdef --- /dev/null +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -0,0 +1,349 @@ +/*************************************************************************** + EllpackView.hpp - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/EllpackView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView() + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize ) + : segmentSize( segmentSize ), size( size ), alignedSize( alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( const EllpackView& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView( const EllpackView&& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +String +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSerializationType() +{ + return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +String +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentsType() +{ + return "Ellpack"; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ViewType +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getView() +{ + return ViewType( segmentSize, size, alignedSize ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ConstViewType +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getConstView() const +{ + return ConstViewType( segmentSize, size, alignedSize ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentsCount() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + return this->segmentSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSize() const +{ + return this->size * this->segmentSize; +} + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getStorageSize() const +{ + return this->alignedSize * this->segmentSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +Index +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + if( RowMajorOrder ) + return segmentIdx * this->segmentSize + localIdx; + else + return segmentIdx + this->alignedSize * localIdx; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +__cuda_callable__ +auto +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType +{ + if( RowMajorOrder ) + return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 ); + else + return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Function, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx * segmentSize; + const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) + f( segmentIdx, localIdx++, globalIdx, compute, args... ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType begin = segmentIdx; + const IndexType end = storageSize; + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += alignedSize ) + f( segmentIdx, localIdx++, globalIdx, compute, args... ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Function, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j++ ) + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i; + const IndexType end = storageSize; + RealType aux( zero ); + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType j = begin; j < end && compute; j += alignedSize ) + reduction( aux, fetch( i, localIdx++, j, compute, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +EllpackView< Device, Index, RowMajorOrder, Alignment >& +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +operator=( const EllpackView< Device, Index, RowMajorOrder, Alignment >& view ) +{ + this->segmentSize = view.segmentSize; + this->size = view.size; + this->alignedSize = view.alignedSize; + return *this; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +save( File& file ) const +{ + file.save( &segmentSize ); + file.save( &size ); + file.save( &alignedSize ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int Alignment > +void +EllpackView< Device, Index, RowMajorOrder, Alignment >:: +load( File& file ) +{ + file.load( &segmentSize ); + file.load( &size ); + file.load( &alignedSize ); +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/SegmentView.h b/src/TNL/Containers/Segments/SegmentView.h new file mode 100644 index 0000000000000000000000000000000000000000..eeb3f9d244cc2c4126bb8d92c824b5ca7ffc9a67 --- /dev/null +++ b/src/TNL/Containers/Segments/SegmentView.h @@ -0,0 +1,89 @@ +/*************************************************************************** + SegmentView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Index, + bool RowMajorOrder = false > +class SegmentView; + +template< typename Index > +class SegmentView< Index, false > +{ + public: + + using IndexType = Index; + + __cuda_callable__ + SegmentView( const IndexType offset, + const IndexType size, + const IndexType step ) + : segmentOffset( offset ), segmentSize( size ), step( step ){}; + + __cuda_callable__ + SegmentView( const SegmentView& view ) + : segmentOffset( view.segmentOffset ), segmentSize( view.segmentSize ), step( view.step ){}; + + __cuda_callable__ + IndexType getSize() const + { + return this->segmentSize; + }; + + __cuda_callable__ + IndexType getGlobalIndex( const IndexType localIndex ) const + { + TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." ); + return segmentOffset + localIndex * step; + }; + + protected: + + IndexType segmentOffset, segmentSize, step; +}; + +template< typename Index > +class SegmentView< Index, true > +{ + public: + + using IndexType = Index; + + __cuda_callable__ + SegmentView( const IndexType offset, + const IndexType size, + const IndexType step = 1 ) // For compatibility with previous specialization + : segmentOffset( offset ), segmentSize( size ){}; + + __cuda_callable__ + IndexType getSize() const + { + return this->segmentSize; + }; + + __cuda_callable__ + IndexType getGlobalIndex( const IndexType localIndex ) const + { + TNL_ASSERT_LT( localIndex, segmentSize, "Local index exceeds segment bounds." ); + return segmentOffset + localIndex; + }; + + protected: + + IndexType segmentOffset, segmentSize; +}; + + } //namespace Segments + } //namespace Containers +} //namespace TNL diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..2027f1d78a96d685806f2715257098a38a624800 --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -0,0 +1,131 @@ +/*************************************************************************** + SlicedEllpack.h - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Allocators/Default.h> +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/SlicedEllpackView.h> +#include <TNL/Containers/Segments/SegmentView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int SliceSize = 32 > +class SlicedEllpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, typename std::remove_const< IndexType >::type, IndexAllocator >; + static constexpr int getSliceSize() { return SliceSize; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >; + template< typename Device_, typename Index_ > + using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >; + using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + + SlicedEllpack(); + + SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + + SlicedEllpack( const SlicedEllpack& segments ); + + SlicedEllpack( const SlicedEllpack&& segments ); + + static String getSerializationType(); + + static String getSegmentsType(); + + ViewType getView(); + + ConstViewType getConstView() const; + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + __cuda_callable__ + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + SlicedEllpack& operator=( const SlicedEllpack& source ) = default; + + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > + SlicedEllpack& operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ); + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType size, alignedSize, segmentsCount; + + OffsetsHolder sliceOffsets, sliceSegmentSizes; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/SlicedEllpack.hpp> diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9ba1276e3eaea3fdf39261c99e7376c6122d4f8b --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -0,0 +1,460 @@ +/*************************************************************************** + SlicedEllpack.hpp - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/SlicedEllpack.h> +#include <TNL/Containers/Segments/Ellpack.h> + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack() + : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) + : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) +{ + this->setSegmentsSizes( sizes ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack( const SlicedEllpack& slicedEllpack ) + : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), + segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), + sliceSegmentSizes( slicedEllpack.sliceSegmentSizes ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack( const SlicedEllpack&& slicedEllpack ) + : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), + segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), + sliceSegmentSizes( slicedEllpack.sliceSegmentSizes ) +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +String +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSerializationType() +{ + return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +String +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSegmentsType() +{ + return ViewType::getSegmentsType(); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ViewType +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getView() +{ + return ViewType( size, alignedSize, segmentsCount, sliceOffsets.getView(), sliceSegmentSizes.getView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ConstViewType +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getConstView() const +{ + return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > + template< typename SizesHolder > +void +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +setSegmentsSizes( const SizesHolder& sizes ) +{ + this->segmentsCount = sizes.getSize(); + const IndexType slicesCount = roundUpDivision( this->segmentsCount, getSliceSize() ); + this->sliceOffsets.setSize( slicesCount + 1 ); + this->sliceOffsets = 0; + this->sliceSegmentSizes.setSize( slicesCount ); + Ellpack< DeviceType, IndexType, IndexAllocator, true > ellpack; + ellpack.setSegmentsSizes( slicesCount, SliceSize ); + + const IndexType _size = sizes.getSize(); + const auto sizes_view = sizes.getConstView(); + auto slices_view = this->sliceOffsets.getView(); + auto slice_segment_size_view = this->sliceSegmentSizes.getView(); + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { + if( globalIdx < _size ) + return sizes_view[ globalIdx ]; + return 0; + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType i ) { + aux = TNL::max( aux, i ); + }; + auto keep = [=] __cuda_callable__ ( IndexType i, IndexType res ) mutable { + slices_view[ i ] = res * SliceSize; + slice_segment_size_view[ i ] = res; + }; + ellpack.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); + this->sliceOffsets.template scan< Algorithms::ScanType::Exclusive >(); + this->size = sum( sizes ); + this->alignedSize = this->sliceOffsets.getElement( slicesCount ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSegmentsCount() const +{ + return this->segmentsCount; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + const Index sliceIdx = segmentIdx / SliceSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + return this->sliceSegmentSizes[ sliceIdx ]; + else + { +#ifdef __CUDA_ARCH__ + return this->sliceSegmentSizes[ sliceIdx ]; +#else + return this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSize() const +{ + return this->size; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getStorageSize() const +{ + return this->alignedSize; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + IndexType sliceOffset, segmentSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + { + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + } + else + { +#ifdef __CUDA__ARCH__ + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; +#else + sliceOffset = this->sliceOffsets.getElement( sliceIdx ); + segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } + if( RowMajorOrder ) + return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; + else + return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +void +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +auto +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType +{ + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ]; + const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + + if( RowMajorOrder ) + return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); + else + return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); + if( RowMajorOrder ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; + const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx++ ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + if( ! f( segmentIdx, localIdx++, globalIdx, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); + if( RowMajorOrder ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + bool compute( true ); + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); + keeper( segmentIdx, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + RealType aux( zero ); + bool compute( true ); + IndexType localIdx( 0 ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); + keeper( segmentIdx, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > + template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >& +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ) +{ + this->size = source.size; + this->alignedSize = source.alignedSize; + this->segmentsCount = source.segmentsCount; + this->sliceOffsets = source.sliceOffsets; + this->sliceSegmentSizes = source.sliceSegmentSizes; + return *this; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +save( File& file ) const +{ + file.save( &size ); + file.save( &alignedSize ); + file.save( &segmentsCount ); + file << this->sliceOffsets; + file << this->sliceSegmentSizes; +} + +template< typename Device, + typename Index, + typename IndexAllocator, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +load( File& file ) +{ + file.load( &size ); + file.load( &alignedSize ); + file.load( &segmentsCount ); + file >> this->sliceOffsets; + file >> this->sliceSegmentSizes; +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h new file mode 100644 index 0000000000000000000000000000000000000000..6e2e55bbc3ef040c9b15f42a41b7e0a4bc14f7d7 --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -0,0 +1,130 @@ +/*************************************************************************** + SlicedEllpackView.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <type_traits> + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/Segments/SegmentView.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int SliceSize = 32 > +class SlicedEllpackView +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsView = typename Containers::VectorView< IndexType, DeviceType, typename std::remove_const < IndexType >::type >; + static constexpr int getSliceSize() { return SliceSize; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + template< typename Device_, typename Index_ > + using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >; + using ViewType = SlicedEllpackView; + using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index > >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + + __cuda_callable__ + SlicedEllpackView(); + + __cuda_callable__ + SlicedEllpackView( IndexType size, + IndexType alignedSize, + IndexType segmentsCount, + OffsetsView&& sliceOffsets, + OffsetsView&& sliceSegmentSizes ); + + __cuda_callable__ + SlicedEllpackView( const SlicedEllpackView& slicedEllpackView ); + + __cuda_callable__ + SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ); + + static String getSerializationType(); + + static String getSegmentsType(); + + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + __cuda_callable__ + SegmentViewType getSegmentView( const IndexType segmentIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + SlicedEllpackView& operator=( const SlicedEllpackView& view ); + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType size, alignedSize, segmentsCount; + + OffsetsView sliceOffsets, sliceSegmentSizes; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL + +#include <TNL/Containers/Segments/SlicedEllpackView.hpp> diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c4e03aada22f877f1b6d0e14498193de8df9cdbd --- /dev/null +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -0,0 +1,408 @@ +/*************************************************************************** + SlicedEllpackView.hpp - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Containers/Segments/SlicedEllpackView.h> + +#include "SlicedEllpackView.h" + +namespace TNL { + namespace Containers { + namespace Segments { + + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView() + : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( IndexType size, + IndexType alignedSize, + IndexType segmentsCount, + OffsetsView&& sliceOffsets, + OffsetsView&& sliceSegmentSizes ) + : size( size ), alignedSize( alignedSize ), segmentsCount( segmentsCount ), + sliceOffsets( std::forward< OffsetsView >( sliceOffsets ) ), sliceSegmentSizes( std::forward< OffsetsView >( sliceSegmentSizes ) ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( const SlicedEllpackView& slicedEllpackView ) + : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ), + segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ), + sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ) + : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ), + segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ), + sliceSegmentSizes( slicedEllpackView.sliceSegmentSizes ) +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +String +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSerializationType() +{ + return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +String +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentsType() +{ + return "SlicedEllpack"; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ViewType +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getView() +{ + return ViewType( size, alignedSize, segmentsCount, sliceOffsets, sliceSegmentSizes ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ConstViewType +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getConstView() const +{ + return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentsCount() const +{ + return this->segmentsCount; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentSize( const IndexType segmentIdx ) const +{ + const Index sliceIdx = segmentIdx / SliceSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + return this->sliceSegmentSizes[ sliceIdx ]; + else + { +#ifdef __CUDA_ARCH__ + return this->sliceSegmentSizes[ sliceIdx ]; +#else + return this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSize() const +{ + return this->size; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getStorageSize() const +{ + return this->alignedSize; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +Index +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getGlobalIndex( const Index segmentIdx, const Index localIdx ) const +{ + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + IndexType sliceOffset, segmentSize; + if( std::is_same< DeviceType, Devices::Host >::value ) + { + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + } + else + { +#ifdef __CUDA_ARCH__ + sliceOffset = this->sliceOffsets[ sliceIdx ]; + segmentSize = this->sliceSegmentSizes[ sliceIdx ]; +#else + sliceOffset = this->sliceOffsets.getElement( sliceIdx ); + segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); +#endif + } + if( RowMajorOrder ) + return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; + else + return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const +{ +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +__cuda_callable__ +auto +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType +{ + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ]; + const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; + + if( RowMajorOrder ) + return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); + else + return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +forSegments( IndexType first, IndexType last, Function& f, Args... args ) const +{ + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); + if( RowMajorOrder ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; + const IndexType end = begin + segmentSize; + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx++ ) + f( segmentIdx, localIdx++, globalIdx, compute, args... ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end && compute; globalIdx += SliceSize ) + f( segmentIdx, localIdx++, globalIdx, compute, args... ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Function, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +forAll( Function& f, Args... args ) const +{ + this->forSegments( 0, this->getSegmentsCount(), f, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); + const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); + const auto sliceOffsets_view = this->sliceOffsets.getConstView(); + if( RowMajorOrder ) + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx< end; globalIdx++ ) + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); + keeper( segmentIdx, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { + const IndexType sliceIdx = segmentIdx / SliceSize; + const IndexType segmentInSliceIdx = segmentIdx % SliceSize; + const IndexType segmentSize = sliceSegmentSizes_view[ sliceIdx ]; + const IndexType begin = sliceOffsets_view[ sliceIdx ] + segmentInSliceIdx; + const IndexType end = sliceOffsets_view[ sliceIdx + 1 ]; + RealType aux( zero ); + IndexType localIdx( 0 ); + bool compute( true ); + for( IndexType globalIdx = begin; globalIdx < end; globalIdx += SliceSize ) + reduction( aux, fetch( segmentIdx, localIdx++, globalIdx, compute, args... ) ); + keeper( segmentIdx, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const +{ + this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& view ) +{ + this->size = view.size; + this->alignedSize = view.alignedSize; + this->segmentsCount = view.segmentsCount; + this->sliceOffsets.bind( view.sliceOffsets ); + this->sliceSegmentSizes.bind( view.sliceSegmentSizes ); + return *this; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +save( File& file ) const +{ + file.save( &size ); + file.save( &alignedSize ); + file.save( &segmentsCount ); + file << this->sliceOffsets; + file << this->sliceSegmentSizes; +} + +template< typename Device, + typename Index, + bool RowMajorOrder, + int SliceSize > +void +SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +load( File& file ) +{ + file.load( &size ); + file.load( &alignedSize ); + file.load( &segmentsCount ); + file >> this->sliceOffsets; + file >> this->sliceSegmentSizes; +} + + } // namespace Segments + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/details/CSR.h b/src/TNL/Containers/Segments/details/CSR.h new file mode 100644 index 0000000000000000000000000000000000000000..38f097669150b7e3f929bdeab3beb1af03ce3e7d --- /dev/null +++ b/src/TNL/Containers/Segments/details/CSR.h @@ -0,0 +1,112 @@ +/*************************************************************************** + CSR.h - description + ------------------- + begin : Dec 12, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + + +namespace TNL { + namespace Containers { + namespace Segments { + namespace details { + +template< typename Device, + typename Index > +class CSR +{ + public: + + using DeviceType = Device; + using IndexType = Index; + + template< typename SizesHolder, typename CSROffsets > + static void setSegmentsSizes( const SizesHolder& sizes, CSROffsets& offsets ) + { + offsets.setSize( sizes.getSize() + 1 ); + auto view = offsets.getView( 0, sizes.getSize() ); + view = sizes; + offsets.setElement( sizes.getSize(), 0 ); + offsets.template scan< Algorithms::ScanType::Exclusive >(); + } + + template< typename CSROffsets > + __cuda_callable__ + static IndexType getSegmentsCount( const CSROffsets& offsets ) + { + return offsets.getSize() - 1; + } + + /*** + * \brief Returns size of the segment number \r segmentIdx + */ + template< typename CSROffsets > + __cuda_callable__ + static IndexType getSegmentSize( const CSROffsets& offsets, const IndexType segmentIdx ) + { + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; +#else + return offsets.getElement( segmentIdx + 1 ) - offsets.getElement( segmentIdx ); +#endif + } + return offsets[ segmentIdx + 1 ] - offsets[ segmentIdx ]; + } + + /*** + * \brief Returns number of elements that needs to be allocated. + */ + template< typename CSROffsets > + __cuda_callable__ + static IndexType getStorageSize( const CSROffsets& offsets ) + { + if( ! std::is_same< DeviceType, Devices::Host >::value ) + { +#ifdef __CUDA_ARCH__ + return offsets[ getSegmentsCount( offsets ) ]; +#else + return offsets.getElement( getSegmentsCount( offsets ) ); +#endif + } + return offsets[ getSegmentsCount( offsets ) ]; + } + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; +}; + } // namespace details + } // namespace Segements + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/details/Ellpack.h b/src/TNL/Containers/Segments/details/Ellpack.h new file mode 100644 index 0000000000000000000000000000000000000000..ecfe63107325793717482b3710c9533a153c34c1 --- /dev/null +++ b/src/TNL/Containers/Segments/details/Ellpack.h @@ -0,0 +1,105 @@ +/*************************************************************************** + Ellpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int Alignment = 32 > +class Ellpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + using SegmentsSizes = OffsetsHolder; + + Ellpack(); + + Ellpack( const SegmentsSizes& sizes ); + + Ellpack( const IndexType segmentsCount, const IndexType segmentSize ); + + Ellpack( const Ellpack& segments ); + + Ellpack( const Ellpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + void setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ); + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType segmentSize, size, alignedSize; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Containers/Segments/details/SlicedEllpack.h b/src/TNL/Containers/Segments/details/SlicedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..6f185bc469e1c1826348b5662735d6a2992fc087 --- /dev/null +++ b/src/TNL/Containers/Segments/details/SlicedEllpack.h @@ -0,0 +1,104 @@ +/*************************************************************************** + SlicedEllpack.h - description + ------------------- + begin : Dec 4, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Containers/Vector.h> + +namespace TNL { + namespace Containers { + namespace Segments { + +template< typename Device, + typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + int SliceSize = 32 > +class SlicedEllpack +{ + public: + + using DeviceType = Device; + using IndexType = Index; + using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; + static constexpr int getSliceSize() { return SliceSize; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + + SlicedEllpack(); + + SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ); + + SlicedEllpack( const SlicedEllpack& segments ); + + SlicedEllpack( const SlicedEllpack&& segments ); + + /** + * \brief Set sizes of particular segments. + */ + template< typename SizesHolder = OffsetsHolder > + void setSegmentsSizes( const SizesHolder& sizes ); + + __cuda_callable__ + IndexType getSegmentsCount() const; + + __cuda_callable__ + IndexType getSegmentSize( const IndexType segmentIdx ) const; + + /** + * \brief Number segments. + */ + __cuda_callable__ + IndexType getSize() const; + + + __cuda_callable__ + IndexType getStorageSize() const; + + __cuda_callable__ + IndexType getGlobalIndex( const Index segmentIdx, const Index localIdx ) const; + + __cuda_callable__ + void getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const; + + /*** + * \brief Go over all segments and for each segment element call + * function 'f' with arguments 'args'. The return type of 'f' is bool. + * When its true, the for-loop continues. Once 'f' returns false, the for-loop + * is terminated. + */ + template< typename Function, typename... Args > + void forSegments( IndexType first, IndexType last, Function& f, Args... args ) const; + + template< typename Function, typename... Args > + void forAll( Function& f, Args... args ) const; + + + /*** + * \brief Go over all segments and perform a reduction in each of them. + */ + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > + void allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const; + + void save( File& file ) const; + + void load( File& file ); + + protected: + + IndexType size, alignedSize, segmentsCount; + + OffsetsHolder sliceOffsets, sliceSegmentSizes; +}; + + } // namespace Segements + } // namespace Conatiners +} // namespace TNL diff --git a/src/TNL/Matrices/Dense.h b/src/TNL/Matrices/Dense.h index c469927234cd835bef7bcfe36599a47cb843b6cc..ada48ee0297438c717772433fb6a09972f2d49e8 100644 --- a/src/TNL/Matrices/Dense.h +++ b/src/TNL/Matrices/Dense.h @@ -10,214 +10,220 @@ #pragma once +#include <TNL/Allocators/Default.h> #include <TNL/Devices/Host.h> +#include <TNL/Matrices/DenseMatrixRowView.h> #include <TNL/Matrices/Matrix.h> -#include <TNL/Matrices/DenseRow.h> -#include <TNL/Containers/Array.h> +#include <TNL/Matrices/DenseMatrixView.h> +#include <TNL/Containers/Segments/Ellpack.h> namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class DenseDeviceDependentCode; template< typename Real = double, typename Device = Devices::Host, - typename Index = int > + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Dense : public Matrix< Real, Device, Index > { -private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using RealAllocatorType = RealAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; + using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; + using SegmentViewType = typename SegmentsType::SegmentViewType; + using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; - // friend class will be needed for templated assignment operators - template< typename Real2, typename Device2, typename Index2 > - friend class Dense; - -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; - typedef typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Matrix< Real, Device, Index > BaseType; - typedef DenseRow< Real, Index > MatrixRow; + // TODO: remove this + using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; + using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Dense< _Real, _Device, _Index >; + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Dense< _Real, _Device, _Index >; - Dense(); - - static String getSerializationType(); + Dense(); - virtual String getSerializationTypeVirtual() const; - - void setDimensions( const IndexType rows, - const IndexType columns ); - - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Dense< Real2, Device2, Index2 >& matrix ); + Dense( const IndexType rows, const IndexType columns ); - /**** - * This method is only for the compatibility with the sparse matrices. - */ - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); - - /**** - * Returns maximal number of the nonzero matrix elements that can be stored - * in a given row. - */ - IndexType getRowLength( const IndexType row ) const; - - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; - - IndexType getMaxRowLength() const; - - IndexType getNumberOfMatrixElements() const; - - IndexType getNumberOfNonzeroMatrixElements() const; - - void reset(); - - void setValue( const RealType& v ); - - __cuda_callable__ - Real& operator()( const IndexType row, - const IndexType column ); - - __cuda_callable__ - const Real& operator()( const IndexType row, - const IndexType column ) const; - - __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); - - bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); - - __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + ViewType getView(); - bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + ConstViewType getConstView() const; - __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + static String getSerializationType(); - bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + virtual String getSerializationTypeVirtual() const; - __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + void setDimensions( const IndexType rows, + const IndexType columns ); - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + template< typename Matrix > + void setLike( const Matrix& matrix ); - __cuda_callable__ - const Real& getElementFast( const IndexType row, - const IndexType column ) const; + /**** + * This method is only for the compatibility with the sparse matrices. + */ + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); - Real getElement( const IndexType row, - const IndexType column ) const; + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; - __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; - /*void getRow( const IndexType row, - IndexType* columns, - RealType* values ) const;*/ + IndexType getMaxRowLength() const; - __cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); + IndexType getNumberOfMatrixElements() const; - __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const; + IndexType getNumberOfNonzeroMatrixElements() const; - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; + void reset(); - template< typename InVector, typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; - template< typename Matrix > - void addMatrix( const Matrix& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); - template< typename Matrix1, typename Matrix2, int tileDim = 32 > - void getMatrixProduct( const Matrix1& matrix1, - const Matrix2& matrix2, - const RealType& matrix1Multiplicator = 1.0, - const RealType& matrix2Multiplicator = 1.0 ); - template< typename Matrix, int tileDim = 32 > - void getTransposition( const Matrix& matrix, - const RealType& matrixMultiplicator = 1.0 ); + void setValue( const RealType& v ); - template< typename Vector1, typename Vector2 > - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; + __cuda_callable__ + Real& operator()( const IndexType row, + const IndexType column ); - // copy assignment - Dense& operator=( const Dense& matrix ); - - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); + __cuda_callable__ + const Real& operator()( const IndexType row, + const IndexType column ) const; - void save( const String& fileName ) const; - - void load( const String& fileName ); + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); - void save( File& file ) const; - - void load( File& file ); + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); - void print( std::ostream& str ) const; + Real getElement( const IndexType row, + const IndexType column ) const; -protected: + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType column ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Matrix > + void addMatrix( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Matrix1, typename Matrix2, int tileDim = 32 > + void getMatrixProduct( const Matrix1& matrix1, + const Matrix2& matrix2, + const RealType& matrix1Multiplicator = 1.0, + const RealType& matrix2Multiplicator = 1.0 ); + + template< typename Matrix, int tileDim = 32 > + void getTransposition( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + /** + * \brief Assignment operator for exactly the same type of the dense matrix. + * + * @param matrix + * @return + */ + Dense& operator=( const Dense& matrix ); + + /** + * \brief Assignment operator for other dense matrices. + * + * @param matrix + * @return + */ + template< typename RHSReal, typename RHSDevice, typename RHSIndex, + bool RHSRowMajorOrder, typename RHSRealAllocator > + Dense& operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ); + + /** + * \brief Assignment operator for other (sparse) types of matrices. + * @param matrix + * @return + */ + template< typename RHSMatrix > + Dense& operator=( const RHSMatrix& matrix ); + + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > + bool operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; + + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > + bool operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; + + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void save( File& file ) const; + + void load( File& file ); + + void print( std::ostream& str ) const; + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType column ) const; + + typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; + friend class DenseDeviceDependentCode< DeviceType >; + + SegmentsType segments; - typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class DenseDeviceDependentCode< DeviceType >; + ViewType view; }; } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/Dense_impl.h> +#include <TNL/Matrices/Dense.hpp> diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense.hpp similarity index 54% rename from src/TNL/Matrices/Dense_impl.h rename to src/TNL/Matrices/Dense.hpp index 246bd09edb459e6df9749af9d1589f508c2c5806..346c26ed8a628d5737e3dce143340a42364e5a9a 100644 --- a/src/TNL/Matrices/Dense_impl.h +++ b/src/TNL/Matrices/Dense.hpp @@ -1,5 +1,5 @@ /*************************************************************************** - Dense_impl.h - description + Dense.hpp - description ------------------- begin : Nov 29, 2013 copyright : (C) 2013 by Tomas Oberhuber @@ -15,412 +15,400 @@ #include <TNL/Exceptions/NotImplementedError.h> namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Real, typename Device, - typename Index > -Dense< Real, Device, Index >::Dense() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::Dense() { } template< typename Real, typename Device, - typename Index > -String Dense< Real, Device, Index >::getSerializationType() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Dense( const IndexType rows, const IndexType columns ) { - return String( "Matrices::Dense< " ) + - getType< RealType >() + ", " + - getType< Device >() + ", " + - getType< IndexType >() + " >"; + this->setDimensions( rows, columns ); } template< typename Real, typename Device, - typename Index > -String Dense< Real, Device, Index >::getSerializationTypeVirtual() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getView() -> ViewType { - return this->getSerializationType(); + return ViewType( this->getRows(), + this->getColumns(), + this->getValues().getView(), + this->segments.getView() ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::setDimensions( const IndexType rows, - const IndexType columns ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getConstView() const -> ConstViewType { - Matrix< Real, Device, Index >::setDimensions( rows, columns ); - this->values.setSize( rows * columns ); - this->values.setValue( 0.0 ); + return ConstViewType( this->getRows(), + this->getColumns(), + this->getValues().getConstView(), + this->segments.getConstView() ); } template< typename Real, typename Device, - typename Index > - template< typename Real2, - typename Device2, - typename Index2 > -void Dense< Real, Device, Index >::setLike( const Dense< Real2, Device2, Index2 >& matrix ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationType() { - this->setDimensions( matrix.getRows(), matrix.getColumns() ); + return ViewType::getSerializationType(); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationTypeVirtual() const { + return this->getSerializationType(); } template< typename Real, typename Device, - typename Index > -Index Dense< Real, Device, Index >::getRowLength( const IndexType row ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setDimensions( const IndexType rows, + const IndexType columns ) { - return this->getColumns(); + Matrix< Real, Device, Index >::setDimensions( rows, columns ); + this->segments.setSegmentsSizes( rows, columns ); + this->values.setSize( rows * columns ); + this->values = 0.0; + this->view = this->getView(); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -Index Dense< Real, Device, Index >::getRowLengthFast( const IndexType row ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Matrix_ > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setLike( const Matrix_& matrix ) { - return this->getColumns(); + this->setDimensions( matrix.getRows(), matrix.getColumns() ); } template< typename Real, typename Device, - typename Index > -Index Dense< Real, Device, Index >::getMaxRowLength() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { - return this->getColumns(); + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "" ); + TNL_ASSERT_LE( max( rowLengths ), this->getColumns(), "" ); } template< typename Real, typename Device, - typename Index > -Index Dense< Real, Device, Index >::getNumberOfMatrixElements() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Vector > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const { - return this->getRows() * this->getColumns(); + this->view.getCompressedRowLengths( rowLengths ); } template< typename Real, typename Device, - typename Index > -Index Dense< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getRowLength( const IndexType row ) const { - IndexType nonzeroElements( 0 ); - for( IndexType row = 0; row < this->getRows(); row++ ) - for( IndexType column = 0; column < this->getColumns(); column++ ) - if( this->getElement( row, column ) != 0 ) - nonzeroElements++; - return nonzeroElements; + return this->getColumns(); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::reset() + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMaxRowLength() const { - Matrix< Real, Device, Index >::reset(); - this->values.reset(); + return this->getColumns(); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::setValue( const Real& value ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfMatrixElements() const { - this->values.setValue( value ); + return this->getRows() * this->getColumns(); } - template< typename Real, typename Device, - typename Index > -__cuda_callable__ -Real& Dense< Real, Device, Index >::operator()( const IndexType row, - const IndexType column ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getNumberOfNonzeroMatrixElements() const { - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - return this->values.operator[]( this->getElementIndex( row, column ) ); + return this->view.getNumberOfNonzeroMatrixElements(); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -const Real& Dense< Real, Device, Index >::operator()( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::reset() { - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - return this->values.operator[]( this->getElementIndex( row, column ) ); + Matrix< Real, Device, Index >::reset(); } - template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Dense< Real, Device, Index >::setElementFast( const IndexType row, - const IndexType column, - const RealType& value ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::setValue( const Real& value ) { - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - this->values.operator[]( this->getElementIndex( row, column ) ) = value; - return true; + this->view.setValue( value ); } template< typename Real, typename Device, - typename Index > -bool Dense< Real, Device, Index >::setElement( const IndexType row, - const IndexType column, - const RealType& value ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ auto +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRow( const IndexType& rowIdx ) const -> const RowView { - this->values.setElement( this->getElementIndex( row, column ), value ); - return true; + return this->view.getRow( rowIdx ); } - template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Dense< Real, Device, Index >::addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ auto +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRow( const IndexType& rowIdx ) -> RowView { - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); - - const IndexType elementIndex = this->getElementIndex( row, column ); - if( thisElementMultiplicator == 1.0 ) - this->values.operator[]( elementIndex ) += value; - else - this->values.operator[]( elementIndex ) = - thisElementMultiplicator * this->values.operator[]( elementIndex ) + value; - return true; + return this->view.getRow( rowIdx ); } template< typename Real, typename Device, - typename Index > -bool Dense< Real, Device, Index >::addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row, + const IndexType column ) { - const IndexType elementIndex = this->getElementIndex( row, column ); - if( thisElementMultiplicator == 1.0 ) - this->values.setElement( elementIndex, - this->values.getElement( elementIndex ) + value ); - else - this->values.setElement( elementIndex, - thisElementMultiplicator * this->values.getElement( elementIndex ) + value ); - return true; + return this->view.operator()( row, column ); } - template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > __cuda_callable__ -bool Dense< Real, Device, Index >::setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) +const Real& Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row, + const IndexType column ) const { - TNL_ASSERT( elements <= this->getColumns(), - std::cerr << " elements = " << elements - << " this->columns = " << this->getColumns() ); - for( IndexType i = 0; i < elements; i++ ) - this->setElementFast( row, columns[ i ], values[ i ] ); - return true; + return this->view.operator()( row, column ); } template< typename Real, typename Device, - typename Index > -bool Dense< Real, Device, Index >::setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setElement( const IndexType row, + const IndexType column, + const RealType& value ) { - TNL_ASSERT( elements <= this->getColumns(), - std::cerr << " elements = " << elements - << " this->columns = " << this->getColumns() ); - for( IndexType i = 0; i < elements; i++ ) - this->setElement( row, columns[ i ], values[ i ] ); - return true; + this->view.setElement( row, column, value ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -bool Dense< Real, Device, Index >::addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - for( IndexType i = 0; i < elements; i++ ) - this->setElementFast( row, columns[ i ], - thisRowMultiplicator * this->getElementFast( row, columns[ i ] ) + values[ i ] ); - return true; + this->view.addElement( row, column, value, thisElementMultiplicator ); } template< typename Real, typename Device, - typename Index > -bool Dense< Real, Device, Index >::addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Real +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElement( const IndexType row, + const IndexType column ) const { - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - for( IndexType i = 0; i < elements; i++ ) - this->setElement( row, columns[ i ], - thisRowMultiplicator * this->getElement( row, columns[ i ] ) + values[ i ] ); - return true; + return this->view.getElement( row, column ); } - template< typename Real, typename Device, - typename Index > -__cuda_callable__ -const Real& Dense< Real, Device, Index >::getElementFast( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const { - TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); - TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); - TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); - TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); +} - return this->values.operator[]( this->getElementIndex( row, column ) ); +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); } template< typename Real, typename Device, - typename Index > -Real Dense< Real, Device, Index >::getElement( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const { - return this->values.getElement( this->getElementIndex( row, column ) ); + this->view.forRows( first, last, function ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -void Dense< Real, Device, Index >::getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) { - for( IndexType i = 0; i < this->getColumns(); i++ ) - { - columns[ i ] = i; - values[ i ] = this->getElementFast( row, i ); - } + this->view.forRows( first, last, function ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -typename Dense< Real, Device, Index >::MatrixRow -Dense< Real, Device, Index >:: -getRow( const IndexType rowIndex ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) const { - if( std::is_same< Device, Devices::Host >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ], - this->columns, - 1 ); - if( std::is_same< Device, Devices::Cuda >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ], - this->columns, - this->rows ); + this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -const typename Dense< Real, Device, Index >::MatrixRow -Dense< Real, Device, Index >:: -getRow( const IndexType rowIndex ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) { - if( std::is_same< Device, Devices::Host >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ], - this->columns, - 1 ); - if( std::is_same< Device, Devices::Cuda >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, 0 ) ], - this->columns, - this->rows ); + this->forRows( 0, this->getRows(), function ); } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Vector > __cuda_callable__ -typename Vector::RealType Dense< Real, Device, Index >::rowVectorProduct( const IndexType row, +typename Vector::RealType Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::rowVectorProduct( const IndexType row, const Vector& vector ) const { - RealType sum( 0.0 ); - for( IndexType column = 0; column < this->getColumns(); column++ ) - sum += this->getElementFast( row, column ) * vector[ column ]; - return sum; + return this->view.rowVectorProduct( row, vector ); } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename InVector, typename OutVector > -void Dense< Real, Device, Index >::vectorProduct( const InVector& inVector, - OutVector& outVector ) const +void +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const { - TNL_ASSERT( this->getColumns() == inVector.getSize(), - std::cerr << "Matrix columns: " << this->getColumns() << std::endl - << "Vector size: " << inVector.getSize() << std::endl ); - TNL_ASSERT( this->getRows() == outVector.getSize(), - std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl ); - - DeviceDependentCode::vectorProduct( *this, inVector, outVector ); + this->view.vectorProduct( inVector, outVector ); } template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Matrix > -void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::addMatrix( const Matrix& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { @@ -440,6 +428,8 @@ void Dense< Real, Device, Index >::addMatrix( const Matrix& matrix, #ifdef HAVE_CUDA template< typename Real, typename Index, + bool RowMajorOrder, + typename RealAllocator, typename Matrix1, typename Matrix2, int tileDim, @@ -538,9 +528,11 @@ __global__ void DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index >* r template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Matrix1, typename Matrix2, int tileDim > -void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getMatrixProduct( const Matrix1& matrix1, const Matrix2& matrix2, const RealType& matrix1Multiplicator, const RealType& matrix2Multiplicator ) @@ -628,6 +620,8 @@ void Dense< Real, Device, Index >::getMatrixProduct( const Matrix1& matrix1, template< typename Real, typename Index, typename Matrix, + bool RowMajorOrder, + typename RealAllocator, int tileDim, int tileRowBlockSize > __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, @@ -696,6 +690,8 @@ __global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Ind template< typename Real, typename Index, + bool RowMajorOrder, + typename RealAllocator, typename Matrix, int tileDim, int tileRowBlockSize > @@ -776,9 +772,11 @@ __global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Matrix, int tileDim > -void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Matrix& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getColumns() == matrix.getRows() && @@ -787,7 +785,7 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix, << "This matrix rows: " << this->getRows() << std::endl << "That matrix columns: " << matrix.getColumns() << std::endl << "That matrix rows: " << matrix.getRows() << std::endl ); - + if( std::is_same< Device, Devices::Host >::value ) { const IndexType& rows = matrix.getRows(); @@ -867,9 +865,11 @@ void Dense< Real, Device, Index >::getTransposition( const Matrix& matrix, template< typename Real, typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + typename RealAllocator > template< typename Vector1, typename Vector2 > -void Dense< Real, Device, Index >::performSORIteration( const Vector1& b, +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -878,148 +878,282 @@ void Dense< Real, Device, Index >::performSORIteration( const Vector1& b, for( IndexType i = 0; i < this->getColumns(); i++ ) { if( i == row ) - diagonalValue = this->getElementFast( row, row ); + diagonalValue = this->getElement( row, row ); else - sum += this->getElementFast( row, i ) * x[ i ]; + sum += this->getElement( row, i ) * x[ i ]; } x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); } - -// copy assignment template< typename Real, typename Device, - typename Index > -Dense< Real, Device, Index >& -Dense< Real, Device, Index >::operator=( const Dense& matrix ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ) { - this->setLike( matrix ); + setLike( matrix ); this->values = matrix.values; return *this; } -// cross-device copy assignment template< typename Real, typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2, typename > -Dense< Real, Device, Index >& -Dense< Real, Device, Index >::operator=( const Dense< Real2, Device2, Index2 >& matrix ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename RHSReal, typename RHSDevice, typename RHSIndex, + bool RHSRowMajorOrder, typename RHSRealAllocator > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ) { - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, - "unknown device" ); - static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, - "unknown device" ); + using RHSMatrix = Dense< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >; + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; this->setLike( matrix ); + if( RowMajorOrder == RHSRowMajorOrder ) + { + this->values = matrix.getValues(); + return *this; + } - throw Exceptions::NotImplementedError("Cross-device assignment for the Dense format is not implemented yet."); + auto this_view = this->view; + if( std::is_same< DeviceType, RHSDeviceType >::value ) + { + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { + this_view( rowIdx, columnIdx ) = value; + }; + matrix.forAllRows( f ); + } + else + { + const IndexType maxRowLength = matrix.getColumns(); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType > thisValuesBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + columnIdx; + matrixValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix. + auto this_view = this->view; + auto f2 = [=] __cuda_callable__ ( IndexType columnIdx, IndexType bufferRowIdx ) mutable { + IndexType bufferIdx = bufferRowIdx * maxRowLength + columnIdx; + this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ]; + }; + Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 ); + baseRow += bufferRowsCount; + } + } + return *this; } - template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::save( const String& fileName ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename RHSMatrix > +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const RHSMatrix& matrix ) { - Object::save( fileName ); + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + this->setDimensions( matrix.getRows(), matrix.getColumns() ); + + // TODO: use getConstView when it works + const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); + auto values_view = this->values.getView(); + RHSIndexType padding_index = matrix.getPaddingIndex(); + this->values = 0.0; + + if( std::is_same< DeviceType, RHSDeviceType >::value ) + { + const auto segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { + if( value != 0.0 && columnIdx != padding_index ) + values_view[ segments_view.getGlobalIndex( rowIdx, columnIdx ) ] = value; + }; + matrix.forAllRows( f ); + } + else + { + const IndexType maxRowLength = max( rowLengths ); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + thisColumnsBuffer = padding_index; + matrixColumnsBuffer_view = padding_index; + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + if( columnIndex != padding_index ) + { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; + matrixValuesBuffer_view[ bufferIdx ] = value; + } + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + thisColumnsBuffer_view = matrixColumnsBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix + auto this_view = this->view; + auto f2 = [=] __cuda_callable__ ( IndexType bufferColumnIdx, IndexType bufferRowIdx ) mutable { + IndexType bufferIdx = bufferRowIdx * maxRowLength + bufferColumnIdx; + IndexType columnIdx = thisColumnsBuffer_view[ bufferIdx ]; + if( columnIdx != padding_index ) + this_view( baseRow + bufferRowIdx, columnIdx ) = thisValuesBuffer_view[ bufferIdx ]; + }; + Algorithms::ParallelFor2D< DeviceType >::exec( ( IndexType ) 0, ( IndexType ) 0, ( IndexType ) maxRowLength, ( IndexType ) min( bufferRowsCount, this->getRows() - baseRow ), f2 ); + baseRow += bufferRowsCount; + } + } + this->view = this->getView(); + return *this; } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::load( const String& fileName ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > +bool +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator==( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const { - Object::load( fileName ); + return( this->getRows() == matrix.getRows() && + this->getColumns() == matrix.getColumns() && + this->getValues() == matrix.getValues() ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::save( File& file ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > +bool +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator!=( const Dense< Real_, Device_, Index_, RowMajorOrder >& matrix ) const { - Matrix< Real, Device, Index >::save( file ); + return ! ( *this == matrix ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::load( File& file ) + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const { - Matrix< Real, Device, Index >::load( file ); + this->view.save( fileName ); } template< typename Real, typename Device, - typename Index > -void Dense< Real, Device, Index >::print( std::ostream& str ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName ) { - for( IndexType row = 0; row < this->getRows(); row++ ) - { - str <<"Row: " << row << " -> "; - for( IndexType column = 0; column < this->getColumns(); column++ ) - str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; - str << std::endl; - } + Object::load( fileName ); } template< typename Real, typename Device, - typename Index > -__cuda_callable__ -Index Dense< Real, Device, Index >::getElementIndex( const IndexType row, - const IndexType column ) const + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const { - TNL_ASSERT( ( std::is_same< Device, Devices::Host >::value || - std::is_same< Device, Devices::Cuda >::value ), ) - if( std::is_same< Device, Devices::Host >::value ) - return row * this->columns + column; - if( std::is_same< Device, Devices::Cuda >::value ) - return column * this->rows + row; - return -1; + this->view.save( file ); } -template<> -class DenseDeviceDependentCode< Devices::Host > +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) { - public: - - typedef Devices::Host Device; - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Dense< Real, Device, Index >& matrix, - const InVector& inVector, - OutVector& outVector ) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( Index row = 0; row < matrix.getRows(); row ++ ) - outVector[ row ] = matrix.rowVectorProduct( row, inVector ); - } -}; + Matrix< Real, Device, Index >::load( file ); + this->segments.load( file ); + this->view = this->getView(); +} -template<> -class DenseDeviceDependentCode< Devices::Cuda > +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const { - public: - - typedef Devices::Cuda Device; + this->view.print( str ); +} - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Dense< Real, Device, Index >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - MatrixVectorProductCuda( matrix, inVector, outVector ); - } -}; +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +Index +Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElementIndex( const IndexType row, const IndexType column ) const +{ + return this->segments.getGlobalIndex( row, column ); +} } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/DenseMatrixRowView.h b/src/TNL/Matrices/DenseMatrixRowView.h new file mode 100644 index 0000000000000000000000000000000000000000..84c6b141cd7f7cdf25be8e550e573680b4cce902 --- /dev/null +++ b/src/TNL/Matrices/DenseMatrixRowView.h @@ -0,0 +1,52 @@ +/*************************************************************************** + DenseMatrixRowView.h - description + ------------------- + begin : Jan 3, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + +template< typename SegmentView, + typename ValuesView > +class DenseMatrixRowView +{ + public: + + using RealType = typename ValuesView::RealType; + using SegmentViewType = SegmentView; + using IndexType = typename SegmentViewType::IndexType; + using ValuesViewType = ValuesView; + + __cuda_callable__ + DenseMatrixRowView( const SegmentViewType& segmentView, + const ValuesViewType& values ); + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + const RealType& getValue( const IndexType column ) const; + + __cuda_callable__ + RealType& getValue( const IndexType column ); + + __cuda_callable__ + void setElement( const IndexType column, + const RealType& value ); + protected: + + SegmentViewType segmentView; + + ValuesViewType values; +}; + } // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/DenseMatrixRowView.hpp> diff --git a/src/TNL/Matrices/DenseMatrixRowView.hpp b/src/TNL/Matrices/DenseMatrixRowView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1962a4d9a8eabe80f28b2e21d1f0506792949225 --- /dev/null +++ b/src/TNL/Matrices/DenseMatrixRowView.hpp @@ -0,0 +1,71 @@ +/*************************************************************************** + DenseMatrixRowView.hpp - description + ------------------- + begin : Jan 3, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/DenseMatrixRowView.h> + +namespace TNL { + namespace Matrices { + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ +DenseMatrixRowView< SegmentView, ValuesView >:: +DenseMatrixRowView( const SegmentViewType& segmentView, + const ValuesViewType& values ) + : segmentView( segmentView ), values( values ) +{ +} + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ auto +DenseMatrixRowView< SegmentView, ValuesView >:: +getSize() const -> IndexType +{ + return segmentView.getSize(); +} + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ auto +DenseMatrixRowView< SegmentView, ValuesView >:: +getValue( const IndexType column ) const -> const RealType& +{ + TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." ); + return values[ segmentView.getGlobalIndex( column ) ]; +} + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ auto +DenseMatrixRowView< SegmentView, ValuesView >:: +getValue( const IndexType column ) -> RealType& +{ + TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." ); + return values[ segmentView.getGlobalIndex( column ) ]; +} + +template< typename SegmentView, + typename ValuesView > +__cuda_callable__ void +DenseMatrixRowView< SegmentView, ValuesView >:: +setElement( const IndexType column, + const RealType& value ) +{ + TNL_ASSERT_LT( column, this->getSize(), "Column index exceeds matrix row size." ); + const IndexType globalIdx = segmentView.getGlobalIndex( column ); + values[ globalIdx ] = value; +} + + + } // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h new file mode 100644 index 0000000000000000000000000000000000000000..95a7c47698fc27f7fa760a64c0176a147ebe391c --- /dev/null +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -0,0 +1,197 @@ +/*************************************************************************** + DenseMatrixView.h - description + ------------------- + begin : Nov 29, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Allocators/Default.h> +#include <TNL/Devices/Host.h> +#include <TNL/Matrices/DenseMatrixRowView.h> +#include <TNL/Matrices/MatrixView.h> +#include <TNL/Containers/Segments/Ellpack.h> + +namespace TNL { +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class DenseMatrixView : public MatrixView< Real, Device, Index > +{ + private: + // convenient template alias for controlling the selection of copy-assignment operator + template< typename Device2 > + using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + + // friend class will be needed for templated assignment operators + //template< typename Real2, typename Device2, typename Index2 > + //friend class Dense; + + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using BaseType = Matrix< Real, Device, Index >; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; + using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; + using SegmentsViewType = typename SegmentsType::ViewType; + using SegmentViewType = typename SegmentsType::SegmentViewType; + using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; + using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + + + // TODO: remove this + using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; + using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = DenseMatrixView< _Real, _Device, _Index >; + + __cuda_callable__ + DenseMatrixView(); + + __cuda_callable__ + DenseMatrixView( const IndexType rows, + const IndexType columns, + const ValuesViewType& values, + const SegmentsViewType& segments ); + + __cuda_callable__ + DenseMatrixView( const DenseMatrixView& m ) = default; + + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; + + IndexType getMaxRowLength() const; + + IndexType getNumberOfMatrixElements() const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + void reset(); + + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + + void setValue( const RealType& v ); + + __cuda_callable__ + Real& operator()( const IndexType row, + const IndexType column ); + + __cuda_callable__ + const Real& operator()( const IndexType row, + const IndexType column ) const; + + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + Real getElement( const IndexType row, + const IndexType column ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Matrix > + void addMatrix( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Matrix1, typename Matrix2, int tileDim = 32 > + void getMatrixProduct( const Matrix1& matrix1, + const Matrix2& matrix2, + const RealType& matrix1Multiplicator = 1.0, + const RealType& matrix2Multiplicator = 1.0 ); + + template< typename Matrix, int tileDim = 32 > + void getTransposition( const Matrix& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + DenseMatrixView& operator=( const DenseMatrixView& matrix ); + + void save( const String& fileName ) const; + + void save( File& file ) const; + + void print( std::ostream& str ) const; + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType column ) const; + + //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; + //friend class DenseDeviceDependentCode< DeviceType >; + + SegmentsViewType segments; +}; + +} // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/DenseMatrixView.hpp> diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..01415ec21c2446a255db57aae7df04cbe5813ed8 --- /dev/null +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -0,0 +1,703 @@ +/*************************************************************************** + DenseMatrixView.hpp - description + ------------------- + begin : Nov 29, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Assert.h> +#include <TNL/Matrices/Dense.h> +#include <TNL/Exceptions/NotImplementedError.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView() +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView( const IndexType rows, + const IndexType columns, + const ValuesViewType& values, + const SegmentsViewType& segments ) + : MatrixView< Real, Device, Index >( rows, columns, values ), segments( segments ) +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getView() -> ViewType +{ + return ViewType( this->getRows(), + this->getColumns(), + this->getValues().getView(), + this->columnIndexes.getView(), + this->segments.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->getRows(), + this->getColumns(), + this->getValues().getConstView(), + this->getColumnsIndexes().getConstView(), + this->segments.getConstView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationType() +{ + return String( "Matrices::Dense< " ) + + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getRowLength( const IndexType row ) const +{ + return this->getColumns(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMaxRowLength() const +{ + return this->getColumns(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfMatrixElements() const +{ + return this->getRows() * this->getColumns(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::reset() +{ + Matrix< Real, Device, Index >::reset(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::setValue( const Real& value ) +{ + this->values = value; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ auto +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ auto +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row, + const IndexType column ) +{ + TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); + TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); + TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); + TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); + + return this->values.operator[]( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row, + const IndexType column ) const +{ + TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); + TNL_ASSERT_LT( row, this->getRows(), "Row index is out of bounds." ); + TNL_ASSERT_GE( column, 0, "Column index must be non-negative." ); + TNL_ASSERT_LT( column, this->getColumns(), "Column index is out of bounds." ); + + return this->values.operator[]( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +setElement( const IndexType row, + const IndexType column, + const RealType& value ) +{ + this->values.setElement( this->getElementIndex( row, column ), value ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + const IndexType elementIndex = this->getElementIndex( row, column ); + if( thisElementMultiplicator == 1.0 ) + this->values.setElement( elementIndex, + this->values.getElement( elementIndex ) + value ); + else + this->values.setElement( elementIndex, + thisElementMultiplicator * this->values.getElement( elementIndex ) + value ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Real +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +getElement( const IndexType row, + const IndexType column ) const +{ + return this->values.getElement( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + const auto values_view = this->values.getConstView(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), RealType() ) ) { + return fetch( rowIdx, columnIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto values_view = this->values.getConstView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable { + function( rowIdx, columnIdx, columnIdx, values_view[ globalIdx ], compute ); + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto values_view = this->values.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType columnIdx, IndexType globalIdx, bool& compute ) mutable { + function( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ], compute ); + }; + this->segments.forSegments( first, last, f ); + +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector > +__cuda_callable__ +typename Vector::RealType +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const +{ + RealType sum( 0.0 ); + // TODO: Fix this + //for( IndexType column = 0; column < this->getColumns(); column++ ) + // sum += this->getElementFast( row, column ) * vector[ column ]; + return sum; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename InVector, + typename OutVector > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const +{ + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); + + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { + return valuesView[ offset ] * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Matrix > +void +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +addMatrix( const Matrix& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + TNL_ASSERT( this->getColumns() == matrix.getColumns() && + this->getRows() == matrix.getRows(), + std::cerr << "This matrix columns: " << this->getColumns() << std::endl + << "This matrix rows: " << this->getRows() << std::endl + << "That matrix columns: " << matrix.getColumns() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + + if( thisMatrixMultiplicator == 1.0 ) + this->values += matrixMultiplicator * matrix.values; + else + this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Matrix1, typename Matrix2, int tileDim > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( const Matrix1& matrix1, + const Matrix2& matrix2, + const RealType& matrix1Multiplicator, + const RealType& matrix2Multiplicator ) +{ + TNL_ASSERT( matrix1.getColumns() == matrix2.getRows() && + this->getRows() == matrix1.getRows() && + this->getColumns() == matrix2.getColumns(), + std::cerr << "This matrix columns: " << this->getColumns() << std::endl + << "This matrix rows: " << this->getRows() << std::endl + << "Matrix1 columns: " << matrix1.getColumns() << std::endl + << "Matrix1 rows: " << matrix1.getRows() << std::endl + << "Matrix2 columns: " << matrix2.getColumns() << std::endl + << "Matrix2 rows: " << matrix2.getRows() << std::endl ); + + if( std::is_same< Device, Devices::Host >::value ) + for( IndexType i = 0; i < this->getRows(); i += tileDim ) + for( IndexType j = 0; j < this->getColumns(); j += tileDim ) + { + const IndexType tileRows = min( tileDim, this->getRows() - i ); + const IndexType tileColumns = min( tileDim, this->getColumns() - j ); + for( IndexType i1 = i; i1 < i + tileRows; i1++ ) + for( IndexType j1 = j; j1 < j + tileColumns; j1++ ) + this->setElementFast( i1, j1, 0.0 ); + + for( IndexType k = 0; k < matrix1.getColumns(); k += tileDim ) + { + const IndexType lastK = min( k + tileDim, matrix1.getColumns() ); + for( IndexType i1 = 0; i1 < tileRows; i1++ ) + for( IndexType j1 = 0; j1 < tileColumns; j1++ ) + for( IndexType k1 = k; k1 < lastK; k1++ ) + this->addElementFast( i + i1, j + j1, + matrix1.getElementFast( i + i1, k1 ) * matrix2.getElementFast( k1, j + j1 ) ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); + const IndexType matrixProductCudaBlockSize( 256 ); + const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim ); + const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim ); + const IndexType cudaBlockColumns( tileDim ); + const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim ); + cudaBlockSize.x = cudaBlockColumns; + cudaBlockSize.y = cudaBlockRows; + const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() ); + const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() ); + + for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ ) + for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ ) + { + cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize(); + if( gridIdx_x == columnGrids - 1 ) + cudaGridSize.x = columnTiles % Cuda::getMaxGridSize(); + if( gridIdx_y == rowGrids - 1 ) + cudaGridSize.y = rowTiles % Cuda::getMaxGridSize(); + Dense* this_kernel = Cuda::passToDevice( *this ); + Matrix1* matrix1_kernel = Cuda::passToDevice( matrix1 ); + Matrix2* matrix2_kernel = Cuda::passToDevice( matrix2 ); + DenseMatrixProductKernel< Real, + Index, + Matrix1, + Matrix2, + tileDim, + cudaBlockRows > + <<< cudaGridSize, + cudaBlockSize, + 3*tileDim*tileDim >>> + ( this_kernel, + matrix1_kernel, + matrix2_kernel, + matrix1Multiplicator, + matrix2Multiplicator, + gridIdx_x, + gridIdx_y ); + Cuda::freeFromDevice( this_kernel ); + Cuda::freeFromDevice( matrix1_kernel ); + Cuda::freeFromDevice( matrix2_kernel ); + }*/ +#endif + } +} + + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Matrix, int tileDim > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( const Matrix& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getColumns() == matrix.getRows() && + this->getRows() == matrix.getColumns(), + std::cerr << "This matrix columns: " << this->getColumns() << std::endl + << "This matrix rows: " << this->getRows() << std::endl + << "That matrix columns: " << matrix.getColumns() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + const IndexType& columns = matrix.getColumns(); + for( IndexType i = 0; i < rows; i += tileDim ) + for( IndexType j = 0; j < columns; j += tileDim ) + for( IndexType k = i; k < i + tileDim && k < rows; k++ ) + for( IndexType l = j; l < j + tileDim && l < columns; l++ ) + this->setElement( l, k, matrixMultiplicator * matrix. getElement( k, l ) ); + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + /*dim3 cudaBlockSize( 0 ), cudaGridSize( 0 ); + const IndexType matrixProductCudaBlockSize( 256 ); + const IndexType rowTiles = roundUpDivision( this->getRows(), tileDim ); + const IndexType columnTiles = roundUpDivision( this->getColumns(), tileDim ); + const IndexType cudaBlockColumns( tileDim ); + const IndexType cudaBlockRows( matrixProductCudaBlockSize / tileDim ); + cudaBlockSize.x = cudaBlockColumns; + cudaBlockSize.y = cudaBlockRows; + const IndexType rowGrids = roundUpDivision( rowTiles, Cuda::getMaxGridSize() ); + const IndexType columnGrids = roundUpDivision( columnTiles, Cuda::getMaxGridSize() ); + const IndexType sharedMemorySize = tileDim*tileDim + tileDim*tileDim/Cuda::getNumberOfSharedMemoryBanks(); + + Dense* this_device = Cuda::passToDevice( *this ); + Matrix* matrix_device = Cuda::passToDevice( matrix ); + + for( IndexType gridIdx_x = 0; gridIdx_x < columnGrids; gridIdx_x++ ) + for( IndexType gridIdx_y = 0; gridIdx_y < rowGrids; gridIdx_y++ ) + { + cudaGridSize.x = cudaGridSize.y = Cuda::getMaxGridSize(); + if( gridIdx_x == columnGrids - 1) + cudaGridSize.x = columnTiles % Cuda::getMaxGridSize(); + if( gridIdx_y == rowGrids - 1 ) + cudaGridSize.y = rowTiles % Cuda::getMaxGridSize(); + if( ( gridIdx_x < columnGrids - 1 || matrix.getColumns() % tileDim == 0 ) && + ( gridIdx_y < rowGrids - 1 || matrix.getRows() % tileDim == 0 ) ) + { + DenseTranspositionAlignedKernel< Real, + Index, + Matrix, + tileDim, + cudaBlockRows > + <<< cudaGridSize, + cudaBlockSize, + sharedMemorySize >>> + ( this_device, + matrix_device, + matrixMultiplicator, + gridIdx_x, + gridIdx_y ); + } + else + { + DenseTranspositionNonAlignedKernel< Real, + Index, + Matrix, + tileDim, + cudaBlockRows > + <<< cudaGridSize, + cudaBlockSize, + sharedMemorySize >>> + ( this_device, + matrix_device, + matrixMultiplicator, + gridIdx_x, + gridIdx_y ); + } + TNL_CHECK_CUDA_DEVICE; + } + Cuda::freeFromDevice( this_device ); + Cuda::freeFromDevice( matrix_device );*/ +#endif + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector1, typename Vector2 > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ), diagonalValue; + for( IndexType i = 0; i < this->getColumns(); i++ ) + { + if( i == row ) + diagonalValue = this->getElement( row, row ); + else + sum += this->getElement( row, i ) * x[ i ]; + } + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / diagonalValue * ( b[ row ] - sum ); +} + + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +DenseMatrixView< Real, Device, Index, RowMajorOrder >& +DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +operator=( const DenseMatrixView& matrix ) +{ + MatrixView< Real, Device, Index >::operator=( matrix ); + this->segments = matrix.segments; + return *this; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const +{ + MatrixView< Real, Device, Index >::save( file ); + this->segments.save( file ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const +{ + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + for( IndexType column = 0; column < this->getColumns(); column++ ) + str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( const IndexType row, + const IndexType column ) const +{ + return this->segments.getGlobalIndex( row, column ); +} + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/DistributedMatrix.h b/src/TNL/Matrices/DistributedMatrix.h index 76b6ea8c1d5173ee8d0cd85421d919085fe590e5..05ee2839152940503f385d883575c9e5730041a2 100644 --- a/src/TNL/Matrices/DistributedMatrix.h +++ b/src/TNL/Matrices/DistributedMatrix.h @@ -14,7 +14,7 @@ #include <type_traits> -#include <TNL/Matrices/SparseRow.h> +#include <TNL/Matrices/Legacy/SparseRow.h> #include <TNL/Communicators/MpiCommunicator.h> #include <TNL/Containers/Subrange.h> #include <TNL/Containers/DistributedVector.h> diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h index b2abd13c537dc181de638caec4b6adf06755b2bf..a4711dce0e5b1a0a984966a7f997ae6549c0b3e4 100644 --- a/src/TNL/Matrices/DistributedSpMV.h +++ b/src/TNL/Matrices/DistributedSpMV.h @@ -19,6 +19,7 @@ #include <vector> #include <utility> // std::pair #include <limits> // std::numeric_limits +#include <TNL/Allocators/Host.h> #include <TNL/Matrices/Dense.h> #include <TNL/Containers/Vector.h> #include <TNL/Containers/VectorView.h> @@ -124,8 +125,8 @@ public: preCommPatternEnds.setLike( commPatternEnds ); for( int j = 0; j < nproc; j++ ) for( int i = 0; i < nproc; i++ ) { - preCommPatternStarts.setElementFast( j, i, span_starts.getElement( i ) ); - preCommPatternEnds.setElementFast( j, i, span_ends.getElement( i ) ); + preCommPatternStarts.setElement( j, i, span_starts.getElement( i ) ); + preCommPatternEnds.setElement( j, i, span_ends.getElement( i ) ); } // assemble the commPattern* matrices @@ -175,7 +176,7 @@ public: continue; if( commPatternStarts( rank, j ) < commPatternEnds( rank, j ) ) commRequests.push_back( CommunicatorType::IRecv( - &globalBuffer[ commPatternStarts( rank, j ) ], + globalBuffer.getPointer( commPatternStarts( rank, j ) ), commPatternEnds( rank, j ) - commPatternStarts( rank, j ), j, 0, group ) ); } @@ -235,7 +236,7 @@ public: protected: // communication pattern - Matrices::Dense< IndexType, Devices::Host, int > commPatternStarts, commPatternEnds; + Matrices::Dense< IndexType, Devices::Host, int, true, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds; // span of rows with only block-diagonal entries std::pair< IndexType, IndexType > localOnlySpan; diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/Legacy/AdEllpack.h similarity index 95% rename from src/TNL/Matrices/AdEllpack.h rename to src/TNL/Matrices/Legacy/AdEllpack.h index f011e6c804429b4059b972b5249feaa1de5f8922..1135084ee9346375ad1f14bde3ba8453d0ac5868 100644 --- a/src/TNL/Matrices/AdEllpack.h +++ b/src/TNL/Matrices/Legacy/AdEllpack.h @@ -10,15 +10,15 @@ /**** * This class implements AdELL format from: - * - * Maggioni M., Berger-Wolf T., + * + * Maggioni M., Berger-Wolf T., * AdELL: An Adaptive Warp-Balancing ELL Format for Efficient Sparse Matrix-Vector Multiplication on GPUs, * In proceedings of 42nd International Conference on Parallel Processing, 2013. */ #pragma once -#include <TNL/Matrices/Sparse.h> +#include <TNL/Matrices/Legacy/Sparse.h> #include <TNL/Containers/Vector.h> namespace TNL { @@ -33,7 +33,7 @@ struct warpInfo using RealType = typename MatrixType::RealType; using DeviceType = typename MatrixType::DeviceType; using IndexType = typename MatrixType::IndexType; - + IndexType offset; IndexType rowOffset; IndexType localLoad; @@ -47,7 +47,7 @@ template< typename MatrixType > class warpList { public: - + using RealType = typename MatrixType::RealType; using DeviceType = typename MatrixType::DeviceType; using IndexType = typename MatrixType::IndexType; @@ -74,7 +74,7 @@ public: { return this->tail; } ~warpList(); - + void printList() { if( this->getHead() == this->getTail() ) @@ -114,7 +114,7 @@ private: // friend class will be needed for templated assignment operators template< typename Real2, typename Device2, typename Index2 > friend class AdEllpack; - + public: typedef Real RealType; @@ -122,6 +122,7 @@ public: typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; template< typename _Real = Real, typename _Device = Device, @@ -132,6 +133,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + IndexType getWarp( const IndexType row ) const; IndexType getInWarpOffset( const IndexType row, @@ -143,7 +146,7 @@ public: void setLike( const AdEllpack< Real2, Device2, Index2 >& matrix ); void reset(); - + template< typename Real2, typename Device2, typename Index2 > bool operator == ( const AdEllpack< Real2, Device2, Index2 >& matrix ) const; @@ -186,7 +189,7 @@ public: typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; - + // copy assignment AdEllpack& operator=( const AdEllpack& matrix ); @@ -194,7 +197,7 @@ public: template< typename Real2, typename Device2, typename Index2, typename = typename Enabler< Device2 >::type > AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix ); - + void save( File& file ) const; void load( File& file ); @@ -242,29 +245,29 @@ public: void spmvCuda4( const InVector& inVector, OutVector& outVector, const int gridIdx ) const; - + template< typename InVector, typename OutVector > __device__ void spmvCuda8( const InVector& inVector, OutVector& outVector, const int gridIdx ) const; - + template< typename InVector, typename OutVector > __device__ void spmvCuda16( const InVector& inVector, OutVector& outVector, - const int gridIdx ) const; + const int gridIdx ) const; template< typename InVector, typename OutVector > __device__ void spmvCuda32( const InVector& inVector, OutVector& outVector, - const int gridIdx ) const; - - + const int gridIdx ) const; + + #endif @@ -293,4 +296,4 @@ protected: } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/AdEllpack_impl.h> +#include <TNL/Matrices/Legacy/AdEllpack_impl.h> diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/Legacy/AdEllpack_impl.h similarity index 99% rename from src/TNL/Matrices/AdEllpack_impl.h rename to src/TNL/Matrices/Legacy/AdEllpack_impl.h index b7b97ff93550ef8c7289b749156e1fd5973e2f7d..242a3c81f810bd001c9beabe4c39e9048ff29e48 100644 --- a/src/TNL/Matrices/AdEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/AdEllpack_impl.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/AdEllpack.h> +#include <TNL/Matrices/Legacy/AdEllpack.h> #include <TNL/Containers/Vector.h> #include <TNL/Math.h> #include <TNL/TypeInfo.h> @@ -220,6 +220,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) } } +template< typename Real, + typename Device, + typename Index > +void AdEllpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/BiEllpack.h b/src/TNL/Matrices/Legacy/BiEllpack.h similarity index 95% rename from src/TNL/Matrices/BiEllpack.h rename to src/TNL/Matrices/Legacy/BiEllpack.h index 3ec4b662fe19979939006a5cd011d037501fdb10..1a92581c71386e31d09b4bd811792fc6a5e6f493 100644 --- a/src/TNL/Matrices/BiEllpack.h +++ b/src/TNL/Matrices/Legacy/BiEllpack.h @@ -18,7 +18,7 @@ #pragma once -#include <TNL/Matrices/Sparse.h> +#include <TNL/Matrices/Legacy/Sparse.h> #include <TNL/Containers/Vector.h> namespace TNL { @@ -32,7 +32,7 @@ template< typename Real, typename Device, typename Index > class BiEllpack : public Sparse< Real, Device, Index > { private: - + // convenient template alias for controlling the selection of copy-assignment operator template< typename Device2 > using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; @@ -40,13 +40,14 @@ private: // friend class will be needed for templated assignment operators template< typename Real2, typename Device2, typename Index2 > friend class BiEllpack; - + public: typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; @@ -62,15 +63,17 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + IndexType getRowLength( const IndexType row ) const; template< typename Real2, typename Device2, typename Index2 > void setLike( const BiEllpack< Real2, Device2, Index2 >& matrix ); - + void reset(); - + template< typename Real2, typename Device2, typename Index2 > bool operator == ( const BiEllpack< Real2, Device2, Index2 >& matrix ) const; @@ -142,7 +145,7 @@ public: IndexType getNumberOfGroups( const IndexType row ) const; bool vectorProductTest() const; - + // copy assignment BiEllpack& operator=( const BiEllpack& matrix ); @@ -160,7 +163,7 @@ public: void load( const String& fileName ); void print( std::ostream& str ) const; - + void printValues() const; void performRowBubbleSort( Containers::Vector< Index, Device, Index >& tempRowLengths ); @@ -217,5 +220,5 @@ private: } //namespace Matrices } // namespace TNL -#include <TNL/Matrices/BiEllpack_impl.h> +#include <TNL/Matrices/Legacy/BiEllpack_impl.h> diff --git a/src/TNL/Matrices/BiEllpackSymmetric.h b/src/TNL/Matrices/Legacy/BiEllpackSymmetric.h similarity index 100% rename from src/TNL/Matrices/BiEllpackSymmetric.h rename to src/TNL/Matrices/Legacy/BiEllpackSymmetric.h diff --git a/src/TNL/Matrices/BiEllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/BiEllpackSymmetric_impl.h similarity index 100% rename from src/TNL/Matrices/BiEllpackSymmetric_impl.h rename to src/TNL/Matrices/Legacy/BiEllpackSymmetric_impl.h diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/Legacy/BiEllpack_impl.h similarity index 98% rename from src/TNL/Matrices/BiEllpack_impl.h rename to src/TNL/Matrices/Legacy/BiEllpack_impl.h index c659b758e9cffe531a101baf8fe3cd812436fe2c..6db2ed6095926d2bcdb3950e996019756dacd422 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/BiEllpack_impl.h @@ -11,7 +11,7 @@ #pragma once -#include <TNL/Matrices/BiEllpack.h> +#include <TNL/Matrices/Legacy/BiEllpack.h> #include <TNL/Containers/Vector.h> #include <TNL/Math.h> #include <cstdio> @@ -78,9 +78,9 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) CompressedRowLengthsVector rowLengths; rowLengths.reset(); rowLengths.setLike( constRowLengths ); - + rowLengths = constRowLengths; - + if( this->getRows() % this->warpSize != 0 ) this->setVirtualRows( this->getRows() + this->warpSize - ( this->getRows() % this->warpSize ) ); else @@ -88,7 +88,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) IndexType strips = this->virtualRows / this->warpSize; this->rowPermArray.setSize( this->rows ); this->groupPointers.setSize( strips * ( this->logWarpSize + 1 ) + 1 ); - + this->groupPointers.setValue( 0 ); DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); @@ -103,6 +103,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) return this->allocateMatrixElements( this->warpSize * this->groupPointers.getElement( strips * ( this->logWarpSize + 1 ) ) ); } +template< typename Real, + typename Device, + typename Index > +void BiEllpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index > @@ -128,7 +138,7 @@ Index BiEllpack< Real, Device, Index >::getNumberOfGroups( const IndexType row ) IndexType strip = row / this->warpSize; IndexType rowStripPermutation = this->rowPermArray.getElement( row ) - this->warpSize * strip; IndexType numberOfGroups = this->logWarpSize + 1; - IndexType bisection = 1; + IndexType bisection = 1; for( IndexType i = 0; i < this->logWarpSize + 1; i++ ) { if( rowStripPermutation < bisection ) @@ -148,7 +158,7 @@ template< typename Real, typename Index > Index BiEllpack< Real, Device, Index >::getRowLength( const IndexType row ) const { - TNL_ASSERT( row >= 0 && row < this->getRows(), + TNL_ASSERT( row >= 0 && row < this->getRows(), std::cerr << "row = " << row << " this->getRows() = " << this->getRows() ); const IndexType strip = row / this->warpSize; @@ -182,7 +192,7 @@ template< typename Real, typename Device2, typename Index2 > void BiEllpack< Real, Device, Index >::setLike( const BiEllpack< Real2, Device2, Index2 >& matrix ) -{ +{ Sparse< Real, Device, Index >::setLike( matrix ); this->rowPermArray.setLike( matrix.rowPermArray ); this->groupPointers.setLike( matrix.groupPointers ); @@ -212,9 +222,9 @@ bool BiEllpack< Real, Device, Index >::operator == ( const BiEllpack< Real2, Dev << " matrix.getRows() = " << matrix.getRows() << " this->getColumns() = " << this->getColumns() << " matrix.getColumns() = " << matrix.getColumns() ); - + TNL_ASSERT_TRUE( false, "operator == is not yet implemented for BiEllpack."); - + // TODO: implement this return false; } @@ -284,10 +294,10 @@ bool BiEllpack< Real, Device, Index >::addElement( const IndexType row, const RealType& value, const RealType& thisElementMultiplicator ) { - const IndexType strip = row / this->warpSize; - const IndexType groupBegin = strip * ( this->logWarpSize + 1 ); - const IndexType rowStripPerm = this->rowPermArray.getElement( row ) - strip * this->warpSize; - IndexType elementPtr = this->groupPointers.getElement( groupBegin ) * this->warpSize + rowStripPerm; + const IndexType strip = row / this->warpSize; + const IndexType groupBegin = strip * ( this->logWarpSize + 1 ); + const IndexType rowStripPerm = this->rowPermArray.getElement( row ) - strip * this->warpSize; + IndexType elementPtr = this->groupPointers.getElement( groupBegin ) * this->warpSize + rowStripPerm; IndexType rowMultiplicator = 1; IndexType step = this->warpSize; @@ -685,7 +695,7 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In "unknown device" ); static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, "unknown device" ); - + this->setLike( matrix ); this->values = matrix.values; this->columnIndexes = matrix.columnIndexes; @@ -777,14 +787,14 @@ void BiEllpack< Real, Device, Index >::printValues() const { for( Index i = 0; i < this->values.getSize(); i++ ) { if( this->columnIndexes.getElement( i ) != this->getColumns() ) - std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) + std::cout << "values.getElement( " << i << " ) = " << this->values.getElement( i ) << "\tcolumnIndexes.getElement( " << i << " ) = " << this->columnIndexes.getElement( i ) << std::endl; } - + for( Index i = 0; i < this->rowPermArray.getSize(); i++ ) { std::cout << "rowPermArray[ " << i << " ] = " << this->rowPermArray.getElement( i ) << std::endl; } - + for( Index i = 0; i < this->groupPointers.getSize(); i++ ) { std::cout << "groupPointers[ " << i << " ] = " << this->groupPointers.getElement( i ) << std::endl; } @@ -1146,7 +1156,7 @@ void BiEllpack< Real, Device, Index >::spmvCuda( const InVector& inVector, __syncthreads(); if( warpStart + inWarpIdx >= this->getRows() ) return; - + outVector[ warpStart + inWarpIdx ] = results[ this->rowPermArray[ warpStart + inWarpIdx ] & ( cudaBlockSize - 1 ) ]; } #endif @@ -1321,7 +1331,7 @@ public: const Index begin = matrix.groupPointers.getElement( groupBegin ) * matrix.warpSize + rowStripPerm * stripLength; Index elementPtr = begin; Index rowLength = 0; - + for( Index group = 0; group < matrix.getNumberOfGroups( row ); group++ ) { for( Index i = 0; i < matrix.getGroupLength( strip, group ); i++ ) diff --git a/src/TNL/Matrices/CSR.h b/src/TNL/Matrices/Legacy/CSR.h similarity index 96% rename from src/TNL/Matrices/CSR.h rename to src/TNL/Matrices/Legacy/CSR.h index 485176d1d849b4be2c296a0f131f5ee2299f89f2..a31f3ee76ed3fc18925212b93e3c4c14837d0a36 100644 --- a/src/TNL/Matrices/CSR.h +++ b/src/TNL/Matrices/Legacy/CSR.h @@ -8,9 +8,9 @@ /* See Copyright Notice in tnl/Copyright */ -#pragma once +#pragma once -#include <TNL/Matrices/Sparse.h> +#include <TNL/Matrices/Legacy/Sparse.h> #include <TNL/Containers/Vector.h> #include <TNL/Devices/Cuda.h> @@ -18,7 +18,7 @@ namespace TNL { namespace Matrices { - + #ifdef HAVE_UMFPACK template< typename Matrix, typename Preconditioner > class UmfpackWrapper; @@ -48,6 +48,7 @@ public: using DeviceType = Device; using IndexType = Index; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; typedef Sparse< Real, Device, Index > BaseType; using MatrixRow = typename BaseType::MatrixRow; @@ -71,13 +72,15 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + IndexType getRowLength( const IndexType row ) const; __cuda_callable__ IndexType getRowLengthFast( const IndexType row ) const; IndexType getNonZeroRowLength( const IndexType row ) const; - + __cuda_callable__ IndexType getNonZeroRowLengthFast( const IndexType row ) const; @@ -264,7 +267,7 @@ protected: int cudaWarpSize, hybridModeSplit; typedef CSRDeviceDependentCode< DeviceType > DeviceDependentCode; - + friend class CSRDeviceDependentCode< DeviceType >; friend class CusparseCSR< RealType >; }; @@ -272,4 +275,4 @@ protected: } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/CSR_impl.h> +#include <TNL/Matrices/Legacy/CSR_impl.h> diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h similarity index 98% rename from src/TNL/Matrices/CSR_impl.h rename to src/TNL/Matrices/Legacy/CSR_impl.h index db31d6dcde6a07cd8b19e87f843f3b6e8b994c5c..5fec923f0333ff6b6df3591ac526366bb0bc27de 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/Legacy/CSR_impl.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Matrices/CSR.h> +#include <TNL/Matrices/Legacy/CSR.h> #include <TNL/Containers/VectorView.h> #include <TNL/Math.h> #include <TNL/Exceptions/NotImplementedError.h> @@ -20,7 +20,7 @@ #endif namespace TNL { -namespace Matrices { +namespace Matrices { #ifdef HAVE_CUSPARSE template< typename Real, typename Index > @@ -99,6 +99,16 @@ void CSR< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLeng this->columnIndexes.setValue( this->columns ); } +template< typename Real, + typename Device, + typename Index > +void CSR< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index > @@ -131,7 +141,7 @@ template< typename Real, typename Index > __cuda_callable__ Index CSR< Real, Device, Index >::getNonZeroRowLengthFast( const IndexType row ) const -{ +{ ConstMatrixRow matrixRow = this->getRow( row ); return matrixRow.getNonZeroElementsCount(); } @@ -884,10 +894,10 @@ template<> class tnlCusparseCSRWrapper< float, int > { public: - + typedef float Real; typedef int Index; - + static void vectorProduct( const Index rows, const Index columns, const Index nnz, @@ -924,10 +934,10 @@ template<> class tnlCusparseCSRWrapper< double, int > { public: - + typedef double Real; typedef int Index; - + static void vectorProduct( const Index rows, const Index columns, const Index nnz, diff --git a/src/TNL/Matrices/ChunkedEllpack.h b/src/TNL/Matrices/Legacy/ChunkedEllpack.h similarity index 99% rename from src/TNL/Matrices/ChunkedEllpack.h rename to src/TNL/Matrices/Legacy/ChunkedEllpack.h index 9d422079608f52e7e89a9954496cbd22c0786c06..a0f55b3263d0911455318886cc680f5242de820b 100644 --- a/src/TNL/Matrices/ChunkedEllpack.h +++ b/src/TNL/Matrices/Legacy/ChunkedEllpack.h @@ -22,7 +22,7 @@ #pragma once -#include <TNL/Matrices/Sparse.h> +#include <TNL/Matrices/Legacy/Sparse.h> #include <TNL/Containers/Vector.h> namespace TNL { @@ -352,5 +352,5 @@ protected: } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/ChunkedEllpack_impl.h> +#include <TNL/Matrices/Legacy/ChunkedEllpack_impl.h> diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h similarity index 99% rename from src/TNL/Matrices/ChunkedEllpack_impl.h rename to src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h index 3b1fd9c8f9fce07344115282ba98411d364d95e3..4061597524742923f03a4115a52a16a2f44bb0ae 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/ChunkedEllpack_impl.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Matrices/ChunkedEllpack.h> +#include <TNL/Matrices/Legacy/ChunkedEllpack.h> #include <TNL/Containers/Vector.h> #include <TNL/Math.h> #include <TNL/Exceptions/NotImplementedError.h> diff --git a/src/TNL/Matrices/Ellpack.h b/src/TNL/Matrices/Legacy/Ellpack.h similarity index 95% rename from src/TNL/Matrices/Ellpack.h rename to src/TNL/Matrices/Legacy/Ellpack.h index 6536f5f6ca6ffa7869851e2ad0883c51de83ed28..eea58b7571b1b6dd2416fbca0b3897f9782539d6 100644 --- a/src/TNL/Matrices/Ellpack.h +++ b/src/TNL/Matrices/Legacy/Ellpack.h @@ -10,11 +10,11 @@ #pragma once -#include <TNL/Matrices/Sparse.h> +#include <TNL/Matrices/Legacy/Sparse.h> #include <TNL/Containers/Vector.h> namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class EllpackDeviceDependentCode; @@ -37,6 +37,7 @@ public: typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef Sparse< Real, Device, Index > BaseType; @@ -59,20 +60,22 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + void setConstantCompressedRowLengths( const IndexType& rowLengths ); IndexType getRowLength( const IndexType row ) const; __cuda_callable__ IndexType getRowLengthFast( const IndexType row ) const; - + IndexType getNonZeroRowLength( const IndexType row ) const; template< typename Real2, typename Device2, typename Index2 > void setLike( const Ellpack< Real2, Device2, Index2 >& matrix ); void reset(); - + template< typename Real2, typename Device2, typename Index2 > bool operator == ( const Ellpack< Real2, Device2, Index2 >& matrix ) const; @@ -175,9 +178,9 @@ public: const Vector& old_x, Vector& x, const RealType& omega ) const; - + // copy assignment - Ellpack& operator=( const Ellpack& matrix ); + Ellpack& operator=( const Ellpack& matrix ); // cross-device copy assignment template< typename Real2, typename Device2, typename Index2, @@ -207,4 +210,4 @@ protected: } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/Ellpack_impl.h> +#include <TNL/Matrices/Legacy/Ellpack_impl.h> diff --git a/src/TNL/Matrices/EllpackSymmetric.h b/src/TNL/Matrices/Legacy/EllpackSymmetric.h similarity index 100% rename from src/TNL/Matrices/EllpackSymmetric.h rename to src/TNL/Matrices/Legacy/EllpackSymmetric.h diff --git a/src/TNL/Matrices/EllpackSymmetricGraph.h b/src/TNL/Matrices/Legacy/EllpackSymmetricGraph.h similarity index 100% rename from src/TNL/Matrices/EllpackSymmetricGraph.h rename to src/TNL/Matrices/Legacy/EllpackSymmetricGraph.h diff --git a/src/TNL/Matrices/EllpackSymmetricGraph_impl.h b/src/TNL/Matrices/Legacy/EllpackSymmetricGraph_impl.h similarity index 100% rename from src/TNL/Matrices/EllpackSymmetricGraph_impl.h rename to src/TNL/Matrices/Legacy/EllpackSymmetricGraph_impl.h diff --git a/src/TNL/Matrices/EllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/EllpackSymmetric_impl.h similarity index 100% rename from src/TNL/Matrices/EllpackSymmetric_impl.h rename to src/TNL/Matrices/Legacy/EllpackSymmetric_impl.h diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Legacy/Ellpack_impl.h similarity index 98% rename from src/TNL/Matrices/Ellpack_impl.h rename to src/TNL/Matrices/Legacy/Ellpack_impl.h index 5ae12f408727bd1ae2f087f69fcb5bae2458fd55..04ca10385a67cf2782460abef6a4dd797ef95082 100644 --- a/src/TNL/Matrices/Ellpack_impl.h +++ b/src/TNL/Matrices/Legacy/Ellpack_impl.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Matrices/Ellpack.h> +#include <TNL/Matrices/Legacy/Ellpack.h> #include <TNL/Containers/Vector.h> #include <TNL/Math.h> #include <TNL/Exceptions/NotImplementedError.h> @@ -33,7 +33,7 @@ String Ellpack< Real, Device, Index >::getSerializationType() { return String( "Matrices::Ellpack< " ) + String( TNL::getType< Real >() ) + - ", [any device], " + + ", [any device], " + getType< Index >() + String( " >" ); } @@ -66,7 +66,7 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows, IndexType missingRows = this->rows - this->alignedRows; missingRows = roundToMultiple( missingRows, Cuda::getWarpSize() ); - + this->alignedRows += missingRows; } } @@ -86,10 +86,20 @@ void Ellpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRow TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" ); this->rowLengths = this->maxRowLength = max( rowLengths ); - + allocateElements(); } +template< typename Real, + typename Device, + typename Index > +void Ellpack< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index > @@ -769,13 +779,13 @@ template< typename Real, void Ellpack< Real, Device, Index >::allocateElements() { IndexType numMtxElmnts = this->alignedRows * this->rowLengths; - + if( this->alignedRows != 0 ) { - TNL_ASSERT_EQ( numMtxElmnts / this->alignedRows, this->rowLengths, + TNL_ASSERT_EQ( numMtxElmnts / this->alignedRows, this->rowLengths, "Ellpack cannot store this matrix. The number of matrix elements has overflown the value that IndexType is capable of storing" ); } - + Sparse< Real, Device, Index >::allocateMatrixElements( this->alignedRows * this->rowLengths ); } diff --git a/src/TNL/Matrices/Legacy/Multidiagonal.h b/src/TNL/Matrices/Legacy/Multidiagonal.h new file mode 100644 index 0000000000000000000000000000000000000000..d9f1379f793f660de1c138609fd085d64909198b --- /dev/null +++ b/src/TNL/Matrices/Legacy/Multidiagonal.h @@ -0,0 +1,224 @@ +/*************************************************************************** + Multidiagonal.h - description + ------------------- + begin : Oct 13, 2011 + copyright : (C) 2011 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/Matrix.h> +#include <TNL/Containers/Vector.h> +#include <TNL/Matrices/Legacy/MultidiagonalRow.h> + +namespace TNL { +namespace Matrices { + +template< typename Device > +class MultidiagonalDeviceDependentCode; + +template< typename Real, typename Device = Devices::Host, typename Index = int > +class Multidiagonal : public Matrix< Real, Device, Index > +{ +private: + // convenient template alias for controlling the selection of copy-assignment operator + template< typename Device2 > + using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + + // friend class will be needed for templated assignment operators + template< typename Real2, typename Device2, typename Index2 > + friend class Multidiagonal; + +public: + typedef Real RealType; + typedef Device DeviceType; + typedef Index IndexType; + typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; + typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef Matrix< Real, Device, Index > BaseType; + typedef MultidiagonalRow< Real, Index > MatrixRow; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Multidiagonal< _Real, _Device, _Index >; + + Multidiagonal(); + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + void setDimensions( const IndexType rows, + const IndexType columns ); + + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + + IndexType getRowLength( const IndexType row ) const; + + __cuda_callable__ + IndexType getRowLengthFast( const IndexType row ) const; + + IndexType getMaxRowLength() const; + + template< typename Vector > + void setDiagonals( const Vector& diagonals ); + + const Containers::Vector< Index, Device, Index >& getDiagonals() const; + + template< typename Real2, typename Device2, typename Index2 > + void setLike( const Multidiagonal< Real2, Device2, Index2 >& matrix ); + + IndexType getNumberOfMatrixElements() const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + IndexType getMaxRowlength() const; + + void reset(); + + template< typename Real2, typename Device2, typename Index2 > + bool operator == ( const Multidiagonal< Real2, Device2, Index2 >& matrix ) const; + + template< typename Real2, typename Device2, typename Index2 > + bool operator != ( const Multidiagonal< Real2, Device2, Index2 >& matrix ) const; + + void setValue( const RealType& v ); + + __cuda_callable__ + bool setElementFast( const IndexType row, + const IndexType column, + const RealType& value ); + + bool setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + __cuda_callable__ + bool addElementFast( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + bool addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + + __cuda_callable__ + bool setRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements ); + + bool setRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements ); + + + __cuda_callable__ + bool addRowFast( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ); + + bool addRow( const IndexType row, + const IndexType* columns, + const RealType* values, + const IndexType numberOfElements, + const RealType& thisElementMultiplicator = 1.0 ); + + __cuda_callable__ + RealType getElementFast( const IndexType row, + const IndexType column ) const; + + RealType getElement( const IndexType row, + const IndexType column ) const; + + __cuda_callable__ + void getRowFast( const IndexType row, + IndexType* columns, + RealType* values ) const; + + /*void getRow( const IndexType row, + IndexType* columns, + RealType* values ) const;*/ + + __cuda_callable__ + MatrixRow getRow( const IndexType rowIndex ); + + __cuda_callable__ + const MatrixRow getRow( const IndexType rowIndex ) const; + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Real2, typename Index2 > + void addMatrix( const Multidiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + bool performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + // copy assignment + Multidiagonal& operator=( const Multidiagonal& matrix ); + + // cross-device copy assignment + template< typename Real2, typename Device2, typename Index2, + typename = typename Enabler< Device2 >::type > + Multidiagonal& operator=( const Multidiagonal< Real2, Device2, Index2 >& matrix ); + + void save( File& file ) const; + + void load( File& file ); + + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void print( std::ostream& str ) const; + +protected: + + bool getElementIndex( const IndexType row, + const IndexType column, + IndexType& index ) const; + + __cuda_callable__ + bool getElementIndexFast( const IndexType row, + const IndexType column, + IndexType& index ) const; + + Containers::Vector< Real, Device, Index > values; + + Containers::Vector< Index, Device, Index > diagonalsShift; + + typedef MultidiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; + friend class MultidiagonalDeviceDependentCode< DeviceType >; +}; + +} // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/Legacy/Multidiagonal_impl.h> diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter.h b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h similarity index 96% rename from src/TNL/Matrices/MultidiagonalMatrixSetter.h rename to src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h index c10d0cc57e7ebf4282158fa6720bf6388c7b9c05..f9e7ef135420f417ab34d6f182dd569d74f30768 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixSetter.h +++ b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h @@ -11,7 +11,7 @@ #pragma once #include <TNL/Meshes/Grid.h> -#include <TNL/Matrices/Multidiagonal.h> +#include <TNL/Matrices/Legacy/Multidiagonal.h> namespace TNL { namespace Matrices { @@ -85,4 +85,4 @@ class MultidiagonalMatrixSetter< Meshes::Grid< 3, MeshReal, Device, MeshIndex > } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/MultidiagonalMatrixSetter_impl.h> +#include <TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h> diff --git a/src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h b/src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h similarity index 100% rename from src/TNL/Matrices/MultidiagonalMatrixSetter_impl.h rename to src/TNL/Matrices/Legacy/MultidiagonalMatrixSetter_impl.h diff --git a/src/TNL/Matrices/MultidiagonalRow.h b/src/TNL/Matrices/Legacy/MultidiagonalRow.h similarity index 96% rename from src/TNL/Matrices/MultidiagonalRow.h rename to src/TNL/Matrices/Legacy/MultidiagonalRow.h index 1d465d2296e9eb86dd3f376713d4e69fa172a09e..c41541eade10ca6b3fa2b98900dffadbc4e62fa9 100644 --- a/src/TNL/Matrices/MultidiagonalRow.h +++ b/src/TNL/Matrices/Legacy/MultidiagonalRow.h @@ -54,5 +54,5 @@ class MultidiagonalRow } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/MultidiagonalRow_impl.h> +#include <TNL/Matrices/Legacy/MultidiagonalRow_impl.h> diff --git a/src/TNL/Matrices/MultidiagonalRow_impl.h b/src/TNL/Matrices/Legacy/MultidiagonalRow_impl.h similarity index 100% rename from src/TNL/Matrices/MultidiagonalRow_impl.h rename to src/TNL/Matrices/Legacy/MultidiagonalRow_impl.h diff --git a/src/TNL/Matrices/Multidiagonal_impl.h b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h similarity index 99% rename from src/TNL/Matrices/Multidiagonal_impl.h rename to src/TNL/Matrices/Legacy/Multidiagonal_impl.h index 76f54f748c0744d810518cd9dde5872a894099ad..375e01c6d1cfe1439cf419d524bfba51d9b24b0b 100644 --- a/src/TNL/Matrices/Multidiagonal_impl.h +++ b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Matrices/Multidiagonal.h> +#include <TNL/Matrices/Legacy/Multidiagonal.h> #include <TNL/Containers/Vector.h> #include <TNL/Math.h> #include <TNL/Exceptions/NotImplementedError.h> diff --git a/src/TNL/Matrices/SlicedEllpack.h b/src/TNL/Matrices/Legacy/SlicedEllpack.h similarity index 96% rename from src/TNL/Matrices/SlicedEllpack.h rename to src/TNL/Matrices/Legacy/SlicedEllpack.h index 7176019d2979c57007062e10f02b263047e58157..63b4330871820eb534c99b765da000cee10ba263 100644 --- a/src/TNL/Matrices/SlicedEllpack.h +++ b/src/TNL/Matrices/Legacy/SlicedEllpack.h @@ -21,7 +21,7 @@ #pragma once -#include <TNL/Matrices/Sparse.h> +#include <TNL/Matrices/Legacy/Sparse.h> #include <TNL/Containers/Vector.h> namespace TNL { @@ -66,6 +66,7 @@ public: typedef Index IndexType; typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVector CompressedRowLengthsVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; + typedef typename Sparse< RealType, DeviceType, IndexType >::CompressedRowLengthsVectorView CompressedRowLengthsVectorView; typedef typename Sparse< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; typedef typename Sparse< RealType, DeviceType, IndexType >::ColumnIndexesVector ColumnIndexesVector; typedef Sparse< Real, Device, Index > BaseType; @@ -89,6 +90,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; + IndexType getRowLength( const IndexType row ) const; __cuda_callable__ @@ -235,4 +238,4 @@ public: } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/SlicedEllpack_impl.h> +#include <TNL/Matrices/Legacy/SlicedEllpack_impl.h> diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetric.h similarity index 100% rename from src/TNL/Matrices/SlicedEllpackSymmetric.h rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetric.h diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph.h similarity index 100% rename from src/TNL/Matrices/SlicedEllpackSymmetricGraph.h rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph.h diff --git a/src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph_impl.h similarity index 100% rename from src/TNL/Matrices/SlicedEllpackSymmetricGraph_impl.h rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetricGraph_impl.h diff --git a/src/TNL/Matrices/SlicedEllpackSymmetric_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpackSymmetric_impl.h similarity index 100% rename from src/TNL/Matrices/SlicedEllpackSymmetric_impl.h rename to src/TNL/Matrices/Legacy/SlicedEllpackSymmetric_impl.h diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h similarity index 98% rename from src/TNL/Matrices/SlicedEllpack_impl.h rename to src/TNL/Matrices/Legacy/SlicedEllpack_impl.h index 8c629b563cfe47f258f44f0705cf7b8b5b6d2435..8673a02c5085f7ac6b793fcf670ddf9c98c2fd87 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/SlicedEllpack_impl.h @@ -10,13 +10,13 @@ #pragma once -#include <TNL/Matrices/SlicedEllpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> #include <TNL/Containers/Vector.h> #include <TNL/Math.h> #include <TNL/Exceptions/NotImplementedError.h> namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Real, typename Device, @@ -83,6 +83,17 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } +template< typename Real, + typename Device, + typename Index, + int SliceSize > +void SlicedEllpack< Real, Device, Index, SliceSize >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const +{ + TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); + for( IndexType row = 0; row < this->getRows(); row++ ) + rowLengths.setElement( row, this->getRowLength( row ) ); +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/Sparse.h b/src/TNL/Matrices/Legacy/Sparse.h similarity index 92% rename from src/TNL/Matrices/Sparse.h rename to src/TNL/Matrices/Legacy/Sparse.h index 7dc3798d22fa421655944f6ad6669725fece5e4c..4de00cb2e8b8197bbe186f891a69fc67e18ba7f0 100644 --- a/src/TNL/Matrices/Sparse.h +++ b/src/TNL/Matrices/Legacy/Sparse.h @@ -11,7 +11,7 @@ #pragma once #include <TNL/Matrices/Matrix.h> -#include <TNL/Matrices/SparseRow.h> +#include <TNL/Matrices/Legacy/SparseRow.h> namespace TNL { namespace Matrices { @@ -26,7 +26,7 @@ class Sparse : public Matrix< Real, Device, Index > typedef Real RealType; typedef Device DeviceType; typedef Index IndexType; - typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVector ValuesVector; + typedef typename Matrix< RealType, DeviceType, IndexType >::ValuesVectorType ValuesVector; typedef Containers::Vector< IndexType, DeviceType, IndexType > ColumnIndexesVector; typedef Matrix< Real, Device, Index > BaseType; typedef SparseRow< RealType, IndexType > MatrixRow; @@ -37,8 +37,6 @@ class Sparse : public Matrix< Real, Device, Index > template< typename Real2, typename Device2, typename Index2 > void setLike( const Sparse< Real2, Device2, Index2 >& matrix ); - IndexType getNumberOfMatrixElements() const; - IndexType getNumberOfNonzeroMatrixElements() const; IndexType getMaxRowLength() const; @@ -66,5 +64,5 @@ class Sparse : public Matrix< Real, Device, Index > } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/Sparse_impl.h> +#include <TNL/Matrices/Legacy/Sparse_impl.h> #include <TNL/Matrices/SparseOperations.h> diff --git a/src/TNL/Matrices/SparseRow.h b/src/TNL/Matrices/Legacy/SparseRow.h similarity index 97% rename from src/TNL/Matrices/SparseRow.h rename to src/TNL/Matrices/Legacy/SparseRow.h index f66cd2ceaf1c6f0cd882bb962a78c6649816aa75..4787e638a43f7f2c8f658d75eddd9e5aef9d415f 100644 --- a/src/TNL/Matrices/SparseRow.h +++ b/src/TNL/Matrices/Legacy/SparseRow.h @@ -80,4 +80,4 @@ std::ostream& operator<<( std::ostream& str, const SparseRow< Real, Index >& row } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/SparseRow_impl.h> +#include <TNL/Matrices/Legacy/SparseRow_impl.h> diff --git a/src/TNL/Matrices/SparseRow_impl.h b/src/TNL/Matrices/Legacy/SparseRow_impl.h similarity index 99% rename from src/TNL/Matrices/SparseRow_impl.h rename to src/TNL/Matrices/Legacy/SparseRow_impl.h index 60dfd5034ee36dd01cc8f6cf616fe86dd238c29b..84f8e210e28832e838fb73aae560c9fd60a25930 100644 --- a/src/TNL/Matrices/SparseRow_impl.h +++ b/src/TNL/Matrices/Legacy/SparseRow_impl.h @@ -10,7 +10,7 @@ #pragma once -#include <TNL/Matrices/SparseRow.h> +#include <TNL/Matrices/Legacy/SparseRow.h> #include <TNL/Exceptions/NotImplementedError.h> // Following includes are here to enable usage of std::vector and std::cout. To avoid having to include Device type (HOW would this be done anyway) diff --git a/src/TNL/Matrices/Sparse_impl.h b/src/TNL/Matrices/Legacy/Sparse_impl.h similarity index 92% rename from src/TNL/Matrices/Sparse_impl.h rename to src/TNL/Matrices/Legacy/Sparse_impl.h index d1643db19a48dbf078fe04389e9cb2d061b28a26..3e479441229197750dc384e210b0efefb044e1a3 100644 --- a/src/TNL/Matrices/Sparse_impl.h +++ b/src/TNL/Matrices/Legacy/Sparse_impl.h @@ -33,16 +33,9 @@ template< typename Real, void Sparse< Real, Device, Index >::setLike( const Sparse< Real2, Device2, Index2 >& matrix ) { Matrix< Real, Device, Index >::setLike( matrix ); - this->allocateMatrixElements( matrix.getNumberOfMatrixElements() ); + this->allocateMatrixElements( matrix.getAllocatedElementsCount() ); } -template< typename Real, - typename Device, - typename Index > -Index Sparse< Real, Device, Index >::getNumberOfMatrixElements() const -{ - return this->values.getSize(); -} template< typename Real, typename Device, @@ -82,7 +75,6 @@ template< typename Real, void Sparse< Real, Device, Index >::reset() { Matrix< Real, Device, Index >::reset(); - this->values.reset(); this->columnIndexes.reset(); } diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index eb29f62c7669af6c32a92414900891accae736f2..129a54cbe0cf47499fa5faa5dab45ad09b50834e 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -11,9 +11,11 @@ #pragma once #include <TNL/Object.h> +#include <TNL/Allocators/Default.h> #include <TNL/Devices/Host.h> #include <TNL/Containers/Vector.h> #include <TNL/Containers/VectorView.h> +#include <TNL/Matrices/MatrixView.h> namespace TNL { /** @@ -23,39 +25,37 @@ namespace Matrices { template< typename Real = double, typename Device = Devices::Host, - typename Index = int > + typename Index = int, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Matrix : public Object { public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; - typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; - typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; - typedef Containers::Vector< RealType, DeviceType, IndexType > ValuesVector; + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >; + using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; + using ValuesVectorType = Containers::Vector< Real, Device, Index, RealAllocator >; + using RealAllocatorType = RealAllocator; + using ViewType = MatrixView< Real, Device, Index >; + using ConstViewType = MatrixView< std::add_const_t< Real >, Device, Index >; - Matrix(); + Matrix( const RealAllocatorType& allocator = RealAllocatorType() ); - virtual void setDimensions( const IndexType rows, - const IndexType columns ); + Matrix( const IndexType rows, + const IndexType columns, + const RealAllocatorType& allocator = RealAllocatorType() ); - virtual void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) = 0; + void setDimensions( const IndexType rows, + const IndexType columns ); - virtual IndexType getRowLength( const IndexType row ) const = 0; + template< typename Matrix_ > + void setLike( const Matrix_& matrix ); - // TODO: implementation is not parallel - // TODO: it would be nice if padding zeros could be stripped - void getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const; + IndexType getAllocatedElementsCount() const; - virtual void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; - - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Matrix< Real2, Device2, Index2 >& matrix ); - - virtual IndexType getNumberOfMatrixElements() const = 0; - - virtual IndexType getNumberOfNonzeroMatrixElements() const = 0; + IndexType getNumberOfNonzeroMatrixElements() const; void reset(); @@ -65,38 +65,9 @@ public: __cuda_callable__ IndexType getColumns() const; - /**** - * TODO: The fast variants of the following methods cannot be virtual. - * If they were, they could not be used in the CUDA kernels. If CUDA allows it - * in the future and it does not slow down, declare them as virtual here. - */ - - virtual bool setElement( const IndexType row, - const IndexType column, - const RealType& value ) = 0; - - virtual bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ) = 0; - - virtual bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements ) = 0; - - virtual bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ) = 0; - - virtual Real getElement( const IndexType row, - const IndexType column ) const = 0; - - const ValuesVector& getValues() const; - - ValuesVector& getValues(); + const ValuesVectorType& getValues() const; + + ValuesVectorType& getValues(); // TODO: parallelize and optimize for sparse matrices template< typename Matrix > @@ -113,28 +84,22 @@ public: // TODO: method for symmetric matrices, should not be in general Matrix interface + [[deprecated]] __cuda_callable__ const IndexType& getNumberOfColors() const; // TODO: method for symmetric matrices, should not be in general Matrix interface + [[deprecated]] void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector); - // TODO: what is this supposed to do?!? There are redefinitions only in the - // EllpackSymmetricGraph and SlicedEllpackSymmetricGraph classes... - bool help( bool verbose = false ) { return true;}; - - // TODO: copy should be done in the operator= and it should work the other way too - void copyFromHostToCuda( Matrices::Matrix< Real, Devices::Host, Index >& matrix ); - - // TODO: missing implementation! - __cuda_callable__ - Index getValuesSize() const; - protected: - IndexType rows, columns, numberOfColors; + IndexType rows, columns; - ValuesVector values; + // TODO: remove + IndexType numberOfColors; + + ValuesVectorType values; }; template< typename Real, typename Device, typename Index > @@ -144,14 +109,7 @@ std::ostream& operator << ( std::ostream& str, const Matrix< Real, Device, Index return str; } -template< typename Matrix, - typename InVector, - typename OutVector > -void MatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ); - } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/Matrix_impl.h> +#include <TNL/Matrices/Matrix.hpp> diff --git a/src/TNL/Matrices/Matrix.hpp b/src/TNL/Matrices/Matrix.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ce5f52274ec1134f30a52b64bf1572b7d757dc84 --- /dev/null +++ b/src/TNL/Matrices/Matrix.hpp @@ -0,0 +1,254 @@ +/*************************************************************************** + Matrix_impl.h - description + ------------------- + begin : Dec 18, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/Matrix.h> +#include <TNL/Assert.h> +#include <TNL/Cuda/LaunchHelpers.h> +#include <TNL/Cuda/MemoryHelpers.h> +#include <TNL/Cuda/SharedMemory.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +Matrix< Real, Device, Index, RealAllocator >:: +Matrix( const RealAllocatorType& allocator ) +: rows( 0 ), + columns( 0 ), + values( allocator ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +Matrix< Real, Device, Index, RealAllocator >:: +Matrix( const IndexType rows_, const IndexType columns_, const RealAllocatorType& allocator ) +: rows( rows_ ), + columns( columns_ ), + values( allocator ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::setDimensions( const IndexType rows, + const IndexType columns ) +{ + TNL_ASSERT( rows > 0 && columns > 0, + std::cerr << " rows = " << rows << " columns = " << columns ); + this->rows = rows; + this->columns = columns; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > + template< typename Matrix_ > +void Matrix< Real, Device, Index, RealAllocator >::setLike( const Matrix_& matrix ) +{ + setDimensions( matrix.getRows(), matrix.getColumns() ); +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +Index Matrix< Real, Device, Index, RealAllocator >::getAllocatedElementsCount() const +{ + return this->values.getSize(); +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +Index Matrix< Real, Device, Index, RealAllocator >::getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +__cuda_callable__ +Index Matrix< Real, Device, Index, RealAllocator >::getRows() const +{ + return this->rows; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +__cuda_callable__ +Index Matrix< Real, Device, Index, RealAllocator >::getColumns() const +{ + return this->columns; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +const typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType& +Matrix< Real, Device, Index, RealAllocator >:: +getValues() const +{ + return this->values; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType& +Matrix< Real, Device, Index, RealAllocator >:: +getValues() +{ + return this->values; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::reset() +{ + this->rows = 0; + this->columns = 0; + this->values.reset(); +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > + template< typename MatrixT > +bool Matrix< Real, Device, Index, RealAllocator >::operator == ( const MatrixT& matrix ) const +{ + if( this->getRows() != matrix.getRows() || + this->getColumns() != matrix.getColumns() ) + return false; + for( IndexType row = 0; row < this->getRows(); row++ ) + for( IndexType column = 0; column < this->getColumns(); column++ ) + if( this->getElement( row, column ) != matrix.getElement( row, column ) ) + return false; + return true; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > + template< typename MatrixT > +bool Matrix< Real, Device, Index, RealAllocator >::operator != ( const MatrixT& matrix ) const +{ + return ! operator == ( matrix ); +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::save( File& file ) const +{ + Object::save( file ); + file.save( &this->rows ); + file.save( &this->columns ); + file << this->values; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::load( File& file ) +{ + Object::load( file ); + file.load( &this->rows ); + file.load( &this->columns ); + file >> this->values; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +void Matrix< Real, Device, Index, RealAllocator >::print( std::ostream& str ) const +{ +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +__cuda_callable__ +const Index& +Matrix< Real, Device, Index, RealAllocator >:: +getNumberOfColors() const +{ + return this->numberOfColors; +} + +template< typename Real, + typename Device, + typename Index, + typename RealAllocator > +void +Matrix< Real, Device, Index, RealAllocator >:: +computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) +{ + for( IndexType i = this->getRows() - 1; i >= 0; i-- ) + { + // init color array + Containers::Vector< Index, Device, Index > usedColors; + usedColors.setSize( this->numberOfColors ); + for( IndexType j = 0; j < this->numberOfColors; j++ ) + usedColors.setElement( j, 0 ); + + // find all colors used in given row + for( IndexType j = i + 1; j < this->getColumns(); j++ ) + if( this->getElement( i, j ) != 0.0 ) + usedColors.setElement( colorsVector.getElement( j ), 1 ); + + // find unused color + bool found = false; + for( IndexType j = 0; j < this->numberOfColors; j++ ) + if( usedColors.getElement( j ) == 0 ) + { + colorsVector.setElement( i, j ); + found = true; + break; + } + if( !found ) + { + colorsVector.setElement( i, this->numberOfColors ); + this->numberOfColors++; + } + } +} + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..75cac40557058db2c41b06ca334787e8eb5f356f --- /dev/null +++ b/src/TNL/Matrices/MatrixInfo.h @@ -0,0 +1,123 @@ +/*************************************************************************** + Matrix.h - description + ------------------- + begin : Dec 18, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/String.h> +#include <TNL/Matrices/Dense.h> +#include <TNL/Matrices/DenseMatrixView.h> +#include <TNL/Matrices/SparseMatrix.h> +#include <TNL/Matrices/SparseMatrixView.h> +#include <TNL/Containers/Segments/CSRView.h> +#include <TNL/Containers/Segments/EllpackView.h> +#include <TNL/Containers/Segments/SlicedEllpackView.h> +#include <TNL/Matrices/Legacy/CSR.h> +#include <TNL/Matrices/Legacy/Ellpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> +#include <TNL/Matrices/Legacy/ChunkedEllpack.h> +#include <TNL/Matrices/Legacy/BiEllpack.h> + +namespace TNL { +/** + * \brief Namespace for matrix formats. + */ +namespace Matrices { + +template< typename Matrix > +struct MatrixInfo +{}; + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +struct MatrixInfo< DenseMatrixView< Real, Device, Index, RowMajorOrder > > +{ + static String getDensity() { return String( "dense" ); }; +}; + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +struct MatrixInfo< Dense< Real, Device, Index, RowMajorOrder, RealAllocator > > +: public MatrixInfo< typename Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::ViewType > +{ +}; + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename Device_, typename Index_ > class SegmentsView > +struct MatrixInfo< SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return SegmentsView< Device, Index >::getSegmentsType(); }; +}; + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments, + typename RealAllocator, + typename IndexAllocator > +struct MatrixInfo< SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator > > +: public MatrixInfo< typename SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >::ViewType > +{ +}; + +///// +// Legacy matrices +template< typename Real, typename Device, typename Index > +struct MatrixInfo< BiEllpack< Real, Device, Index > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "BiEllpack Legacy"; }; +}; + +template< typename Real, typename Device, typename Index > +struct MatrixInfo< CSR< Real, Device, Index > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "CSR Legacy"; }; +}; + +template< typename Real, typename Device, typename Index > +struct MatrixInfo< ChunkedEllpack< Real, Device, Index > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "ChunkedEllpack Legacy"; }; +}; + +template< typename Real, typename Device, typename Index > +struct MatrixInfo< Ellpack< Real, Device, Index > > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "Ellpack Legacy"; }; +}; + +template< typename Real, typename Device, typename Index, int SliceSize > +struct MatrixInfo< SlicedEllpack< Real, Device, Index, SliceSize> > +{ + static String getDensity() { return String( "sparse" ); }; + + static String getFormat() { return "SlicedEllpack Legacy"; }; +}; + +} //namespace Matrices +} //namespace TNL diff --git a/src/TNL/Matrices/MatrixReader.h b/src/TNL/Matrices/MatrixReader.h index aaf75a373fc8552db255f412868897497f7d3744..ae0606678f1b9167b10fd4b9e4868847c41c9b99 100644 --- a/src/TNL/Matrices/MatrixReader.h +++ b/src/TNL/Matrices/MatrixReader.h @@ -15,7 +15,7 @@ #include <TNL/Containers/Vector.h> namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Device > class MatrixReaderDeviceDependentCode @@ -30,24 +30,24 @@ class MatrixReader typedef typename Matrix::DeviceType DeviceType; typedef typename Matrix::RealType RealType; - static bool readMtxFile( const String& fileName, + static void readMtxFile( const String& fileName, Matrix& matrix, bool verbose = false, bool symReader = false ); - static bool readMtxFile( std::istream& file, + static void readMtxFile( std::istream& file, Matrix& matrix, bool verbose = false, bool symReader = false ); - static bool readMtxFileHostMatrix( std::istream& file, + static void readMtxFileHostMatrix( std::istream& file, Matrix& matrix, typename Matrix::CompressedRowLengthsVector& rowLengths, bool verbose, bool symReader ); - static bool verifyMtxFile( std::istream& file, + static void verifyMtxFile( std::istream& file, const Matrix& matrix, bool verbose = false ); @@ -61,13 +61,13 @@ class MatrixReader static bool checkMtxHeader( const String& header, bool& symmetric ); - static bool readMtxHeader( std::istream& file, + static void readMtxHeader( std::istream& file, IndexType& rows, IndexType& columns, bool& symmetricMatrix, bool verbose ); - static bool computeCompressedRowLengthsFromMtxFile( std::istream& file, + static void computeCompressedRowLengthsFromMtxFile( std::istream& file, Containers::Vector< int, DeviceType, int >& rowLengths, const int columns, const int rows, @@ -75,13 +75,13 @@ class MatrixReader bool verbose, bool symReader = false ); - static bool readMatrixElementsFromMtxFile( std::istream& file, + static void readMatrixElementsFromMtxFile( std::istream& file, Matrix& matrix, bool symmetricMatrix, bool verbose, bool symReader ); - static bool parseMtxLineWithElement( const String& line, + static void parseMtxLineWithElement( const String& line, IndexType& row, IndexType& column, RealType& value ); diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index d00fdb9044f18fee40ea339cc29fe534874d6027..df2c05c63237c23e0fa26ba331ac8a3d2da03b8c 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -11,6 +11,7 @@ #pragma once #include <iomanip> +#include <sstream> #include <TNL/String.h> #include <TNL/Containers/Vector.h> #include <TNL/Timer.h> @@ -20,7 +21,7 @@ namespace TNL { namespace Matrices { template< typename Matrix > -bool MatrixReader< Matrix >::readMtxFile( const String& fileName, +void MatrixReader< Matrix >::readMtxFile( const String& fileName, Matrix& matrix, bool verbose, bool symReader ) @@ -28,24 +29,21 @@ bool MatrixReader< Matrix >::readMtxFile( const String& fileName, std::fstream file; file.open( fileName.getString(), std::ios::in ); if( ! file ) - { - std::cerr << "I am not able to open the file " << fileName << "." << std::endl; - return false; - } - return readMtxFile( file, matrix, verbose, symReader ); + throw std::runtime_error( std::string( "I am not able to open the file " ) + fileName.getString() ); + readMtxFile( file, matrix, verbose, symReader ); } template< typename Matrix > -bool MatrixReader< Matrix >::readMtxFile( std::istream& file, +void MatrixReader< Matrix >::readMtxFile( std::istream& file, Matrix& matrix, bool verbose, bool symReader ) { - return MatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose, symReader ); + MatrixReaderDeviceDependentCode< typename Matrix::DeviceType >::readMtxFile( file, matrix, verbose, symReader ); } template< typename Matrix > -bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, +void MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, Matrix& matrix, typename Matrix::CompressedRowLengthsVector& rowLengths, bool verbose, @@ -54,37 +52,29 @@ bool MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, IndexType rows, columns; bool symmetricMatrix( false ); - if( ! readMtxHeader( file, rows, columns, symmetricMatrix, verbose ) ) - return false; + readMtxHeader( file, rows, columns, symmetricMatrix, verbose ); if( symReader && !symmetricMatrix ) - { - std::cout << "Matrix is not symmetric, but flag for symmetric matrix is given. Aborting." << std::endl; - return false; - } + throw std::runtime_error( "Matrix is not symmetric, but flag for symmetric matrix is given. Aborting." ); matrix.setDimensions( rows, columns ); rowLengths.setSize( rows ); - if( ! computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ) ) - return false; + computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ); matrix.setCompressedRowLengths( rowLengths ); - if( ! readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ) ) - return false; - return true; + readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ); } template< typename Matrix > -bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file, +void MatrixReader< Matrix >::verifyMtxFile( std::istream& file, const Matrix& matrix, bool verbose ) { bool symmetricMatrix( false ); IndexType rows, columns; - if( ! readMtxHeader( file, rows, columns, symmetricMatrix, false ) ) - return false; + readMtxHeader( file, rows, columns, symmetricMatrix, false ); file.clear(); file.seekg( 0, std::ios::beg ); String line; @@ -102,16 +92,16 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file, } IndexType row( 1 ), column( 1 ); RealType value; - if( ! parseMtxLineWithElement( line, row, column, value ) ) - return false; + parseMtxLineWithElement( line, row, column, value ); if( value != matrix.getElement( row-1, column-1 ) || ( symmetricMatrix && value != matrix.getElement( column-1, row-1 ) ) ) { - std::cerr << "*** !!! VERIFICATION ERROR !!! *** " << std::endl - << "The elements differ at " << row-1 << " row " << column-1 << " column." << std::endl - << "The matrix value is " << matrix.getElement( row-1, column-1 ) - << " while the file value is " << value << "." << std::endl; - return false; + std::stringstream str; + str << "*** !!! VERIFICATION ERROR !!! *** " << std::endl + << "The elements differ at " << row-1 << " row " << column-1 << " column." << std::endl + << "The matrix value is " << matrix.getElement( row-1, column-1 ) + << " while the file value is " << value << "." << std::endl; + throw std::runtime_error( str.str() ); } processedElements++; if( symmetricMatrix && row != column ) @@ -126,7 +116,6 @@ bool MatrixReader< Matrix >::verifyMtxFile( std::istream& file, std::cout << " Verifying the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements() << " -> " << timer.getRealTime() << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 )) << "MB/s." << std::endl; - return true; } template< typename Matrix > @@ -152,8 +141,7 @@ bool MatrixReader< Matrix >::findLineByElement( std::istream& file, } IndexType currentRow( 1 ), currentColumn( 1 ); RealType value; - if( ! parseMtxLineWithElement( line, currentRow, currentColumn, value ) ) - return false; + parseMtxLineWithElement( line, currentRow, currentColumn, value ); if( ( currentRow == row + 1 && currentColumn == column + 1 ) || ( symmetricMatrix && currentRow == column + 1 && currentColumn == row + 1 ) ) return true; @@ -166,41 +154,27 @@ bool MatrixReader< Matrix >::checkMtxHeader( const String& header, bool& symmetric ) { std::vector< String > parsedLine = header.split( ' ', String::SplitSkip::SkipEmpty ); - if( (int) parsedLine.size() < 5 ) - return false; - if( parsedLine[ 0 ] != "%%MatrixMarket" ) + if( (int) parsedLine.size() < 5 || parsedLine[ 0 ] != "%%MatrixMarket" ) return false; if( parsedLine[ 1 ] != "matrix" ) - { - std::cerr << "Error: 'matrix' expected in the header line (" << header << ")." << std::endl; - return false; - } + throw std::runtime_error( std::string( "Keyword 'matrix' is expected in the header line: " ) + header.getString() ); if( parsedLine[ 2 ] != "coordinates" && parsedLine[ 2 ] != "coordinate" ) - { - std::cerr << "Error: Only 'coordinates' format is supported now, not " << parsedLine[ 2 ] << "." << std::endl; - return false; - } + throw std::runtime_error( std::string( "Error: Only 'coordinates' format is supported now, not " ) + parsedLine[ 2 ].getString() ); if( parsedLine[ 3 ] != "real" ) - { - std::cerr << "Error: Only 'real' matrices are supported, not " << parsedLine[ 3 ] << "." << std::endl; - return false; - } + throw std::runtime_error( std::string( "Only 'real' matrices are supported, not " ) + parsedLine[ 3 ].getString() ); if( parsedLine[ 4 ] != "general" ) { if( parsedLine[ 4 ] == "symmetric" ) symmetric = true; else - { - std::cerr << "Error: Only 'general' matrices are supported, not " << parsedLine[ 4 ] << "." << std::endl; - return false; - } + throw std::runtime_error( std::string( "Only 'general' matrices are supported, not " ) + parsedLine[ 4 ].getString() ); } return true; } template< typename Matrix > -bool MatrixReader< Matrix >::readMtxHeader( std::istream& file, +void MatrixReader< Matrix >::readMtxHeader( std::istream& file, IndexType& rows, IndexType& columns, bool& symmetric, @@ -217,26 +191,17 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file, if( ! headerParsed ) { headerParsed = checkMtxHeader( line, symmetric ); - if( ! headerParsed ) - return false; if( verbose && symmetric ) std::cout << "The matrix is SYMMETRIC ... "; continue; } if( line[ 0 ] == '%' ) continue; if( ! headerParsed ) - { - std::cerr << "Unknown format of the file. We expect line like this:" << std::endl; - std::cerr << "%%MatrixMarket matrix coordinate real general" << std::endl; - return false; - } + throw std::runtime_error( "Unknown format of the file. We expect line like this: %%MatrixMarket matrix coordinate real general" ); parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty ); if( (int) parsedLine.size() != 3 ) - { - std::cerr << "Wrong number of parameters in the matrix header." << std::endl; - return false; - } + throw std::runtime_error( "Wrong number of parameters in the matrix header - should be 3." ); rows = atoi( parsedLine[ 0 ].getString() ); columns = atoi( parsedLine[ 1 ].getString() ); if( verbose ) @@ -244,16 +209,13 @@ bool MatrixReader< Matrix >::readMtxHeader( std::istream& file, << " rows and " << columns << " columns. " << std::endl; if( rows <= 0 || columns <= 0 ) - { - std::cerr << "Wrong parameters in the matrix header." << std::endl; - return false; - } - return true; + throw std::runtime_error( "Row or column index is negative." ); + break; } } template< typename Matrix > -bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istream& file, +void MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istream& file, Containers::Vector< int, DeviceType, int >& rowLengths, const int columns, const int rows, @@ -279,13 +241,13 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea } IndexType row( 1 ), column( 1 ); RealType value; - if( ! parseMtxLineWithElement( line, row, column, value ) ) - return false; + parseMtxLineWithElement( line, row, column, value ); numberOfElements++; if( column > columns || row > rows ) { - std::cerr << "There is an element at position " << row << ", " << column << " out of the matrix dimensions " << rows << " x " << columns << "." << std::endl; - return false; + std::stringstream str; + str << "There is an element at position " << row << ", " << column << " out of the matrix dimensions " << rows << " x " << columns << "."; + throw std::runtime_error( str.str() ); } if( verbose ) std::cout << " Counting the matrix elements ... " << numberOfElements / 1000 << " thousands \r" << std::flush; @@ -298,23 +260,23 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea if( rowLengths[ row - 1 ] > columns ) { - std::cerr << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << row << "." << std::endl; - return false; + std::stringstream str; + str << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << row << "."; + throw std::runtime_error( str.str() ); } if( symmetricMatrix && row != column && symReader ) { rowLengths[ column - 1 ]++; if( rowLengths[ column - 1 ] > columns ) { - std::cerr << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << column << " ." << std::endl; - return false; + std::stringstream str; + str << "There are more elements ( " << rowLengths[ row - 1 ] << " ) than the matrix columns ( " << columns << " ) at the row " << column << " ."; + throw std::runtime_error( str.str() ); } continue; } else if( symmetricMatrix && row != column && !symReader ) - { rowLengths[ column - 1 ]++; - } } file.clear(); long int fileSize = file.tellg(); @@ -323,11 +285,10 @@ bool MatrixReader< Matrix >::computeCompressedRowLengthsFromMtxFile( std::istrea std::cout << " Counting the matrix elements ... " << numberOfElements / 1000 << " thousands -> " << timer.getRealTime() << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 )) << "MB/s." << std::endl; - return true; } template< typename Matrix > -bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, +void MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, Matrix& matrix, bool symmetricMatrix, bool verbose, @@ -351,8 +312,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, } IndexType row( 1 ), column( 1 ); RealType value; - if( ! parseMtxLineWithElement( line, row, column, value ) ) - return false; + parseMtxLineWithElement( line, row, column, value ); if( !symReader || ( symReader && row >= column ) ) @@ -362,9 +322,7 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, processedElements++; if( symmetricMatrix && row != column && symReader ) - { continue; - } else if( symmetricMatrix && row != column && !symReader ) { matrix.setElement( column - 1, row - 1, value ); @@ -376,15 +334,13 @@ bool MatrixReader< Matrix >::readMatrixElementsFromMtxFile( std::istream& file, long int fileSize = file.tellg(); timer.stop(); if( verbose ) - std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getNumberOfMatrixElements() + std::cout << " Reading the matrix elements ... " << processedElements << " / " << matrix.getAllocatedElementsCount() << " -> " << timer.getRealTime() << " sec. i.e. " << fileSize / ( timer.getRealTime() * ( 1 << 20 )) << "MB/s." << std::endl; - - return true; } template< typename Matrix > -bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line, +void MatrixReader< Matrix >::parseMtxLineWithElement( const String& line, IndexType& row, IndexType& column, RealType& value ) @@ -392,13 +348,13 @@ bool MatrixReader< Matrix >::parseMtxLineWithElement( const String& line, std::vector< String > parsedLine = line.split( ' ', String::SplitSkip::SkipEmpty ); if( (int) parsedLine.size() != 3 ) { - std::cerr << "Wrong number of parameters in the matrix row at line:" << line << std::endl; - return false; + std::stringstream str; + str << "Wrong number of parameters in the matrix row at line:" << line; + throw std::runtime_error( str.str() ); } row = atoi( parsedLine[ 0 ].getString() ); column = atoi( parsedLine[ 1 ].getString() ); value = ( RealType ) atof( parsedLine[ 2 ].getString() ); - return true; } template<> @@ -407,13 +363,13 @@ class MatrixReaderDeviceDependentCode< Devices::Host > public: template< typename Matrix > - static bool readMtxFile( std::istream& file, + static void readMtxFile( std::istream& file, Matrix& matrix, bool verbose, bool symReader ) { typename Matrix::CompressedRowLengthsVector rowLengths; - return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); + MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); } }; @@ -423,7 +379,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda > public: template< typename Matrix > - static bool readMtxFile( std::istream& file, + static void readMtxFile( std::istream& file, Matrix& matrix, bool verbose, bool symReader ) @@ -433,10 +389,7 @@ class MatrixReaderDeviceDependentCode< Devices::Cuda > HostMatrixType hostMatrix; CompressedRowLengthsVector rowLengths; - return MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); - - matrix = hostMatrix; - return true; + MatrixReader< Matrix >::readMtxFileHostMatrix( file, matrix, rowLengths, verbose, symReader ); } }; diff --git a/src/TNL/Matrices/MatrixType.h b/src/TNL/Matrices/MatrixType.h new file mode 100644 index 0000000000000000000000000000000000000000..8d4cfe7ba4d592ca117005127660f800df287906 --- /dev/null +++ b/src/TNL/Matrices/MatrixType.h @@ -0,0 +1,63 @@ +/*************************************************************************** + MatrixType.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + +template< bool Symmetric, + bool Binary > +struct MatrixType +{ + static constexpr bool isSymmetric() { return Symmetric; } + + static constexpr bool isBinary() { return Binary; } + +}; + +struct GeneralMatrix +{ + static constexpr bool isSymmetric() { return false; } + + static constexpr bool isBinary() { return false; } +}; + +struct SymmetricMatrix +{ + static constexpr bool isSymmetric() { return true; } + + static constexpr bool isBinary() { return false; } +}; + +struct BinaryMatrix +{ + static constexpr bool isSymmetric() { return false; } + + static constexpr bool isBinary() { return true; } +}; + +struct BinarySymmetricMatrix +{ + static constexpr bool isSymmetric() { return false; } + + static constexpr bool isBinary() { return true; } +}; + +struct SymmetricBinaryMatrix +{ + static constexpr bool isSymmetric() { return false; } + + static constexpr bool isBinary() { return true; } +}; + + + } //namespace Matrices +} //namespace TNL \ No newline at end of file diff --git a/src/TNL/Matrices/MatrixView.h b/src/TNL/Matrices/MatrixView.h new file mode 100644 index 0000000000000000000000000000000000000000..89551018186e279ced32005971ccfe44d7bda918 --- /dev/null +++ b/src/TNL/Matrices/MatrixView.h @@ -0,0 +1,117 @@ +/*************************************************************************** + MatrixView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Object.h> +#include <TNL/Allocators/Default.h> +#include <TNL/Devices/Host.h> +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> + +namespace TNL { +/** + * \brief Namespace for matrix formats. + */ +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int > +class MatrixView : public Object +{ +public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using CompressedRowLengthsVector = Containers::Vector< IndexType, DeviceType, IndexType >; + using CompressedRowLengthsVectorView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstCompressedRowLengthsVectorView = typename CompressedRowLengthsVectorView::ConstViewType; + using ValuesView = Containers::VectorView< RealType, DeviceType, IndexType >; + using ViewType = MatrixView< typename std::remove_const< Real >::type, Device, Index >; + using ConstViewType = MatrixView< typename std::add_const< Real >::type, Device, Index >; + + __cuda_callable__ + MatrixView(); + + __cuda_callable__ + MatrixView( const IndexType rows, + const IndexType columns, + const ValuesView& values ); + + __cuda_callable__ + MatrixView( const MatrixView& view ) = default; + + IndexType getAllocatedElementsCount() const; + + virtual IndexType getNumberOfNonzeroMatrixElements() const; + + __cuda_callable__ + IndexType getRows() const; + + __cuda_callable__ + IndexType getColumns() const; + + __cuda_callable__ + const ValuesView& getValues() const; + + __cuda_callable__ + ValuesView& getValues(); + + /** + * \brief Shallow copy of the matrix view. + * + * @param view + * @return + */ + __cuda_callable__ + MatrixView& operator=( const MatrixView& view ); + + // TODO: parallelize and optimize for sparse matrices + template< typename Matrix > + bool operator == ( const Matrix& matrix ) const; + + template< typename Matrix > + bool operator != ( const Matrix& matrix ) const; + + virtual void save( File& file ) const; + + virtual void load( File& file ); + + virtual void print( std::ostream& str ) const; + + + // TODO: method for symmetric matrices, should not be in general Matrix interface + [[deprecated]] + __cuda_callable__ + const IndexType& getNumberOfColors() const; + + // TODO: method for symmetric matrices, should not be in general Matrix interface + [[deprecated]] + void computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector); + + protected: + + IndexType rows, columns; + + ValuesView values; +}; + +template< typename Real, typename Device, typename Index > +std::ostream& operator << ( std::ostream& str, const MatrixView< Real, Device, Index >& m ) +{ + m.print( str ); + return str; +} + +} // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/MatrixView.hpp> diff --git a/src/TNL/Matrices/MatrixView.hpp b/src/TNL/Matrices/MatrixView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dfac8f3afc671db39382fff6cc1916e73f3fc4b0 --- /dev/null +++ b/src/TNL/Matrices/MatrixView.hpp @@ -0,0 +1,225 @@ +/*************************************************************************** + MatrixView.hpp - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/Matrix.h> +#include <TNL/Assert.h> +#include <TNL/Cuda/LaunchHelpers.h> +#include <TNL/Cuda/MemoryHelpers.h> +#include <TNL/Cuda/SharedMemory.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +MatrixView< Real, Device, Index >:: +MatrixView() +: rows( 0 ), + columns( 0 ) +{ +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +MatrixView< Real, Device, Index >:: +MatrixView( const IndexType rows_, + const IndexType columns_, + const ValuesView& values_ ) + : rows( rows_ ), columns( columns_ ), values( values_ ) +{ +} + +template< typename Real, + typename Device, + typename Index > +Index +MatrixView< Real, Device, Index >:: +getAllocatedElementsCount() const +{ + return this->values.getSize(); +} + +template< typename Real, + typename Device, + typename Index > +Index +MatrixView< Real, Device, Index >:: +getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index MatrixView< Real, Device, Index >::getRows() const +{ + return this->rows; +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +Index MatrixView< Real, Device, Index >::getColumns() const +{ + return this->columns; +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +const typename MatrixView< Real, Device, Index >::ValuesView& +MatrixView< Real, Device, Index >:: +getValues() const +{ + return this->values; +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +typename MatrixView< Real, Device, Index >::ValuesView& +MatrixView< Real, Device, Index >:: +getValues() +{ + return this->values; +} +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +MatrixView< Real, Device, Index >& +MatrixView< Real, Device, Index >:: +operator=( const MatrixView& view ) +{ + rows = view.rows; + columns = view.columns; + values.bind( view.values ); + return *this; +} + +template< typename Real, + typename Device, + typename Index > + template< typename MatrixT > +bool MatrixView< Real, Device, Index >::operator == ( const MatrixT& matrix ) const +{ + if( this->getRows() != matrix.getRows() || + this->getColumns() != matrix.getColumns() ) + return false; + for( IndexType row = 0; row < this->getRows(); row++ ) + for( IndexType column = 0; column < this->getColumns(); column++ ) + if( this->getElement( row, column ) != matrix.getElement( row, column ) ) + return false; + return true; +} + +template< typename Real, + typename Device, + typename Index > + template< typename MatrixT > +bool MatrixView< Real, Device, Index >::operator != ( const MatrixT& matrix ) const +{ + return ! operator == ( matrix ); +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::save( File& file ) const +{ + Object::save( file ); + file.save( &this->rows ); + file.save( &this->columns ); + file << this->values; +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::load( File& file ) +{ + Object::load( file ); + file.load( &this->rows ); + file.load( &this->columns ); + file >> this->values; +} + +template< typename Real, + typename Device, + typename Index > +void MatrixView< Real, Device, Index >::print( std::ostream& str ) const +{ +} + +template< typename Real, + typename Device, + typename Index > +__cuda_callable__ +const Index& +MatrixView< Real, Device, Index >:: +getNumberOfColors() const +{ + return this->numberOfColors; +} + +template< typename Real, + typename Device, + typename Index > +void +MatrixView< Real, Device, Index >:: +computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) +{ + for( IndexType i = this->getRows() - 1; i >= 0; i-- ) + { + // init color array + Containers::Vector< Index, Device, Index > usedColors; + usedColors.setSize( this->numberOfColors ); + for( IndexType j = 0; j < this->numberOfColors; j++ ) + usedColors.setElement( j, 0 ); + + // find all colors used in given row + for( IndexType j = i + 1; j < this->getColumns(); j++ ) + if( this->getElement( i, j ) != 0.0 ) + usedColors.setElement( colorsVector.getElement( j ), 1 ); + + // find unused color + bool found = false; + for( IndexType j = 0; j < this->numberOfColors; j++ ) + if( usedColors.getElement( j ) == 0 ) + { + colorsVector.setElement( i, j ); + found = true; + break; + } + if( !found ) + { + colorsVector.setElement( i, this->numberOfColors ); + this->numberOfColors++; + } + } +} + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/Matrix_impl.h b/src/TNL/Matrices/Matrix_impl.h deleted file mode 100644 index 3371ee4ec453d0c2d6af294ed6ab2df9d3623b32..0000000000000000000000000000000000000000 --- a/src/TNL/Matrices/Matrix_impl.h +++ /dev/null @@ -1,299 +0,0 @@ -/*************************************************************************** - Matrix_impl.h - description - ------------------- - begin : Dec 18, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <TNL/Matrices/Matrix.h> -#include <TNL/Assert.h> -#include <TNL/Cuda/LaunchHelpers.h> -#include <TNL/Cuda/MemoryHelpers.h> -#include <TNL/Cuda/SharedMemory.h> - -namespace TNL { -namespace Matrices { - -template< typename Real, - typename Device, - typename Index > -Matrix< Real, Device, Index >::Matrix() -: rows( 0 ), - columns( 0 ) -{ -} - -template< typename Real, - typename Device, - typename Index > -void Matrix< Real, Device, Index >::setDimensions( const IndexType rows, - const IndexType columns ) -{ - TNL_ASSERT( rows > 0 && columns > 0, - std::cerr << " rows = " << rows << " columns = " << columns ); - this->rows = rows; - this->columns = columns; -} - -template< typename Real, - typename Device, - typename Index > -void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVector& rowLengths ) const -{ - rowLengths.setSize( this->getRows() ); - getCompressedRowLengths( rowLengths.getView() ); -} - -template< typename Real, - typename Device, - typename Index > -void Matrix< Real, Device, Index >::getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const -{ - TNL_ASSERT_EQ( rowLengths.getSize(), this->getRows(), "invalid size of the rowLengths vector" ); - for( IndexType row = 0; row < this->getRows(); row++ ) - rowLengths.setElement( row, this->getRowLength( row ) ); -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, - typename Device2, - typename Index2 > -void Matrix< Real, Device, Index >::setLike( const Matrix< Real2, Device2, Index2 >& matrix ) -{ - setDimensions( matrix.getRows(), matrix.getColumns() ); -} - -template< typename Real, - typename Device, - typename Index > -Index Matrix< Real, Device, Index >::getNumberOfNonzeroMatrixElements() const -{ - IndexType nonZeroElements( 0 ); - for( IndexType i = 0; this->values.getSize(); i++ ) - if( this->values.getElement( i ) != 0.0 ) - nonZeroElements++; - - return nonZeroElements; -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -Index Matrix< Real, Device, Index >::getRows() const -{ - return this->rows; -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -Index Matrix< Real, Device, Index >::getColumns() const -{ - return this->columns; -} - -template< typename Real, - typename Device, - typename Index > -const typename Matrix< Real, Device, Index >::ValuesVector& -Matrix< Real, Device, Index >:: -getValues() const -{ - return this->values; -} - -template< typename Real, - typename Device, - typename Index > -typename Matrix< Real, Device, Index >::ValuesVector& -Matrix< Real, Device, Index >:: -getValues() -{ - return this->values; -} - -template< typename Real, - typename Device, - typename Index > -void Matrix< Real, Device, Index >::reset() -{ - this->rows = 0; - this->columns = 0; -} - -template< typename Real, - typename Device, - typename Index > - template< typename MatrixT > -bool Matrix< Real, Device, Index >::operator == ( const MatrixT& matrix ) const -{ - if( this->getRows() != matrix.getRows() || - this->getColumns() != matrix.getColumns() ) - return false; - for( IndexType row = 0; row < this->getRows(); row++ ) - for( IndexType column = 0; column < this->getColumns(); column++ ) - if( this->getElement( row, column ) != matrix.getElement( row, column ) ) - return false; - return true; -} - -template< typename Real, - typename Device, - typename Index > - template< typename MatrixT > -bool Matrix< Real, Device, Index >::operator != ( const MatrixT& matrix ) const -{ - return ! operator == ( matrix ); -} - -template< typename Real, - typename Device, - typename Index > -void Matrix< Real, Device, Index >::save( File& file ) const -{ - Object::save( file ); - file.save( &this->rows ); - file.save( &this->columns ); - file << this->values; -} - -template< typename Real, - typename Device, - typename Index > -void Matrix< Real, Device, Index >::load( File& file ) -{ - Object::load( file ); - file.load( &this->rows ); - file.load( &this->columns ); - file >> this->values; -} - -template< typename Real, - typename Device, - typename Index > -void Matrix< Real, Device, Index >::print( std::ostream& str ) const -{ -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -const Index& -Matrix< Real, Device, Index >:: -getNumberOfColors() const -{ - return this->numberOfColors; -} - -template< typename Real, - typename Device, - typename Index > -void -Matrix< Real, Device, Index >:: -computeColorsVector(Containers::Vector<Index, Device, Index> &colorsVector) -{ - for( IndexType i = this->getRows() - 1; i >= 0; i-- ) - { - // init color array - Containers::Vector< Index, Device, Index > usedColors; - usedColors.setSize( this->numberOfColors ); - for( IndexType j = 0; j < this->numberOfColors; j++ ) - usedColors.setElement( j, 0 ); - - // find all colors used in given row - for( IndexType j = i + 1; j < this->getColumns(); j++ ) - if( this->getElement( i, j ) != 0.0 ) - usedColors.setElement( colorsVector.getElement( j ), 1 ); - - // find unused color - bool found = false; - for( IndexType j = 0; j < this->numberOfColors; j++ ) - if( usedColors.getElement( j ) == 0 ) - { - colorsVector.setElement( i, j ); - found = true; - break; - } - if( !found ) - { - colorsVector.setElement( i, this->numberOfColors ); - this->numberOfColors++; - } - } -} - -template< typename Real, - typename Device, - typename Index > -void -Matrix< Real, Device, Index >:: -copyFromHostToCuda( Matrix< Real, Devices::Host, Index >& matrix ) -{ - this->numberOfColors = matrix.getNumberOfColors(); - this->columns = matrix.getColumns(); - this->rows = matrix.getRows(); - - this->values.setSize( matrix.getValuesSize() ); -} - -#ifdef HAVE_CUDA -template< typename Matrix, - typename InVector, - typename OutVector > -__global__ void MatrixVectorProductCudaKernel( const Matrix* matrix, - const InVector* inVector, - OutVector* outVector, - int gridIdx ) -{ - static_assert( std::is_same< typename Matrix::DeviceType, Devices::Cuda >::value, "" ); - const typename Matrix::IndexType rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - if( rowIdx < matrix->getRows() ) - ( *outVector )[ rowIdx ] = matrix->rowVectorProduct( rowIdx, *inVector ); -} -#endif - -template< typename Matrix, - typename InVector, - typename OutVector > -void MatrixVectorProductCuda( const Matrix& matrix, - const InVector& inVector, - OutVector& outVector ) -{ -#ifdef HAVE_CUDA - typedef typename Matrix::IndexType IndexType; - Matrix* kernel_this = Cuda::passToDevice( matrix ); - InVector* kernel_inVector = Cuda::passToDevice( inVector ); - OutVector* kernel_outVector = Cuda::passToDevice( outVector ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); - const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); - for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) - { - if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - MatrixVectorProductCudaKernel<<< cudaGridSize, cudaBlockSize >>> - ( kernel_this, - kernel_inVector, - kernel_outVector, - gridIdx ); - TNL_CHECK_CUDA_DEVICE; - } - Cuda::freeFromDevice( kernel_this ); - Cuda::freeFromDevice( kernel_inVector ); - Cuda::freeFromDevice( kernel_outVector ); - TNL_CHECK_CUDA_DEVICE; -#endif -} - -} // namespace Matrices -} // namespace TNL diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 1ee6a25e9af4fbf8d8f28461e6658305e2b0151f..3b92d1db1c4343bb41832f6158377b1ac6356c9b 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -12,213 +12,216 @@ #include <TNL/Matrices/Matrix.h> #include <TNL/Containers/Vector.h> -#include <TNL/Matrices/MultidiagonalRow.h> +#include <TNL/Matrices/MultidiagonalMatrixRowView.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Matrices/details/MultidiagonalMatrixIndexer.h> +#include <TNL/Matrices/MultidiagonalMatrixView.h> namespace TNL { -namespace Matrices { +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > +{ + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using RealAllocatorType = RealAllocator; + using IndexAllocatorType = IndexAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; + using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; + using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >; + using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; -template< typename Device > -class MultidiagonalDeviceDependentCode; + using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; + using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType; -template< typename Real, typename Device = Devices::Host, typename Index = int > -class Multidiagonal : public Matrix< Real, Device, Index > -{ -private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - // friend class will be needed for templated assignment operators - template< typename Real2, typename Device2, typename Index2 > - friend class Multidiagonal; + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Multidiagonal< _Real, _Device, _Index >; -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; - typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Matrix< Real, Device, Index > BaseType; - typedef MultidiagonalRow< Real, Index > MatrixRow; + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Multidiagonal< _Real, _Device, _Index >; + Multidiagonal(); - Multidiagonal(); + Multidiagonal( const IndexType rows, + const IndexType columns ); - static String getSerializationType(); + template< typename Vector > + Multidiagonal( const IndexType rows, + const IndexType columns, + const Vector& diagonalsShifts ); - virtual String getSerializationTypeVirtual() const; + ViewType getView() const; // TODO: remove const - void setDimensions( const IndexType rows, - const IndexType columns ); + //ConstViewType getConstView() const; - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + static String getSerializationType(); - IndexType getRowLength( const IndexType row ) const; + virtual String getSerializationTypeVirtual() const; - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; + template< typename Vector > + void setDimensions( const IndexType rows, + const IndexType columns, + const Vector& diagonalsShifts ); - IndexType getMaxRowLength() const; + //template< typename Vector > + void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities ); - template< typename Vector > - void setDiagonals( const Vector& diagonals ); + const IndexType& getDiagonalsCount() const; - const Containers::Vector< Index, Device, Index >& getDiagonals() const; + const DiagonalsShiftsType& getDiagonalsShifts() const; - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Multidiagonal< Real2, Device2, Index2 >& matrix ); + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; - IndexType getNumberOfMatrixElements() const; + IndexType getNonemptyRowsCount() const; - IndexType getNumberOfNonzeroMatrixElements() const; + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; - IndexType getMaxRowlength() const; + IndexType getMaxRowLength() const; - void reset(); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + void setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ); - template< typename Real2, typename Device2, typename Index2 > - bool operator == ( const Multidiagonal< Real2, Device2, Index2 >& matrix ) const; + IndexType getNumberOfNonzeroMatrixElements() const; - template< typename Real2, typename Device2, typename Index2 > - bool operator != ( const Multidiagonal< Real2, Device2, Index2 >& matrix ) const; + void reset(); - void setValue( const RealType& v ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + bool operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; - __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + bool operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; - bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); - __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; - bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + void setValue( const RealType& v ); + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); - __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements ); + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); - bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements ); + RealType getElement( const IndexType row, + const IndexType column ) const; + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ); + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType numberOfElements, - const RealType& thisElementMultiplicator = 1.0 ); + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; - __cuda_callable__ - RealType getElementFast( const IndexType row, - const IndexType column ) const; + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); - RealType getElement( const IndexType row, - const IndexType column ) const; + template< typename Function > + void forAllRows( Function& function ) const; - __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; + template< typename Function > + void forAllRows( Function& function ); - /*void getRow( const IndexType row, - IndexType* columns, - RealType* values ) const;*/ + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; - __cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; - __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const; + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + void addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; + template< typename Real2, typename Index2 > + void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; - template< typename Real2, typename Index2 > - void addMatrix( const Multidiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); + // copy assignment + Multidiagonal& operator=( const Multidiagonal& matrix ); - template< typename Real2, typename Index2 > - void getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); + // cross-device copy assignment + template< typename Real_, + typename Device_, + typename Index_, + bool RowMajorOrder_, + typename RealAllocator_, + typename IndexAllocator_ > + Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix ); - template< typename Vector1, typename Vector2 > - bool performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; + void save( File& file ) const; - // copy assignment - Multidiagonal& operator=( const Multidiagonal& matrix ); + void load( File& file ); - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Multidiagonal& operator=( const Multidiagonal< Real2, Device2, Index2 >& matrix ); + void save( const String& fileName ) const; - void save( File& file ) const; + void load( const String& fileName ); - void load( File& file ); + void print( std::ostream& str ) const; - void save( const String& fileName ) const; + const IndexerType& getIndexer() const; - void load( const String& fileName ); + IndexerType& getIndexer(); - void print( std::ostream& str ) const; + __cuda_callable__ + IndexType getPaddingIndex() const; -protected: + protected: - bool getElementIndex( const IndexType row, - const IndexType column, - IndexType& index ) const; + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType localIdx ) const; - __cuda_callable__ - bool getElementIndexFast( const IndexType row, - const IndexType column, - IndexType& index ) const; + DiagonalsShiftsType diagonalsShifts; - Containers::Vector< Real, Device, Index > values; + HostDiagonalsShiftsType hostDiagonalsShifts; - Containers::Vector< Index, Device, Index > diagonalsShift; + IndexerType indexer; - typedef MultidiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class MultidiagonalDeviceDependentCode< DeviceType >; + ViewType view; }; } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/Multidiagonal_impl.h> +#include <TNL/Matrices/Multidiagonal.hpp> diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e8eb6675134f20ecd2f74f3a984d59c94bcd8bbf --- /dev/null +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -0,0 +1,950 @@ +/*************************************************************************** + Multidiagonal.hpp - description + ------------------- + begin : Oct 13, 2011 + copyright : (C) 2011 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <sstream> +#include <TNL/Assert.h> +#include <TNL/Matrices/Multidiagonal.h> +#include <TNL/Exceptions/NotImplementedError.h> + +namespace TNL { +namespace Matrices { + +template< typename Device > +class MultidiagonalDeviceDependentCode; + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal() +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal( const IndexType rows, + const IndexType columns, + const Vector& diagonalsShifts ) +{ + TNL_ASSERT_GT( diagonalsShifts.getSize(), 0, "Cannot construct mutltidiagonal matrix with no diagonals shifts." ); + this->setDimensions( rows, columns, diagonalsShifts ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getView() const -> ViewType +{ + // TODO: fix when getConstView works + return ViewType( const_cast< Multidiagonal* >( this )->values.getView(), + const_cast< Multidiagonal* >( this )->diagonalsShifts.getView(), + const_cast< Multidiagonal* >( this )->hostDiagonalsShifts.getView(), + indexer ); +} + +/*template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->values.getConstView(), indexer ); +}*/ + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +String +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getSerializationType() +{ + return String( "Matrices::Multidiagonal< " ) + + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator], [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +String +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setDimensions( const IndexType rows, + const IndexType columns, + const Vector& diagonalsShifts ) +{ + Matrix< Real, Device, Index >::setDimensions( rows, columns ); + this->diagonalsShifts = diagonalsShifts; + this->hostDiagonalsShifts = diagonalsShifts; + const IndexType minShift = min( diagonalsShifts ); + IndexType nonemptyRows = min( rows, columns ); + if( rows > columns && minShift < 0 ) + nonemptyRows = min( rows, nonemptyRows - minShift ); + this->indexer.set( rows, columns, diagonalsShifts.getSize(), nonemptyRows ); + this->values.setSize( this->indexer.getStorageSize() ); + this->values = 0.0; + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + // template< typename Vector > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) +{ + if( max( rowLengths ) > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( rowLengths.getElement( 0 ) > 2 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); + if( this->getRows() > this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 1 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() == this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 2 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() < this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +const Index& +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getDiagonalsCount() const +{ + return this->view.getDiagonalsCount(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getDiagonalsShifts() const -> const DiagonalsShiftsType& +{ + return this->diagonalsShifts; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + return this->view.getCompressedRowLengths( rowLengths ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getNonemptyRowsCount() const +{ + return this->indexer.getNonemptyRowsCount(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getRowLength( const IndexType row ) const +{ + return this->view.getRowLength( row ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getMaxRowLength() const +{ + return this->view.getMaxRowLength(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ) +{ + this->setDimensions( m.getRows(), m.getColumns(), m.getDiagonalsShifts() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getNumberOfNonzeroMatrixElements() const +{ + return this->view.getNumberOfNonzeroMatrixElements(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +reset() +{ + Matrix< Real, Device, Index >::reset(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +{ + return ! this->operator==( matrix ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setValue( const RealType& v ) +{ + this->view.setValue( v ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + return this->view.getRow( rowIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + return this->view.getRow( rowIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) +{ + this->view.setElement( row, column, value ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + this->view.addElement( row, column, value, thisElementMultiplicator ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Real +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getElement( const IndexType row, const IndexType column ) const +{ + return this->view.getElement( row, column ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + this->view.forRows( first, last, function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + this->view.forRows( first, last, function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) const +{ + this->view.forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) +{ + this->view.forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +template< typename Vector > +__cuda_callable__ +typename Vector::RealType +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const +{ + return this->view.rowVectorProduct(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename InVector, + typename OutVector > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const +{ + this->view.vectorProduct( inVector, outVector ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator ); +} + +#ifdef HAVE_CUDA +template< typename Real, + typename Real2, + typename Index, + typename Index2 > +__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, + Multidiagonal< Real, Devices::Cuda, Index >* outMatrix, + const Real matrixMultiplicator, + const Index gridIdx ) +{ + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < inMatrix->getRows() ) + { + if( rowIdx > 0 ) + outMatrix->setElementFast( rowIdx-1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); + outMatrix->setElementFast( rowIdx, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); + if( rowIdx < inMatrix->getRows()-1 ) + outMatrix->setElementFast( rowIdx+1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); + } +} +#endif + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real2, typename Index2 > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getRows() == matrix.getRows(), + std::cerr << "This matrix rows: " << this->getRows() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + for( IndexType i = 1; i < rows; i++ ) + { + RealType aux = matrix. getElement( i, i - 1 ); + this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); + this->setElement( i, i, matrix.getElement( i, i ) ); + this->setElement( i - 1, i, aux ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + Multidiagonal* kernel_this = Cuda::passToDevice( *this ); + typedef Multidiagonal< Real2, Device, Index2 > InMatrixType; + InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_inMatrix, + kernel_this, + matrixMultiplicator, + gridIdx ); + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inMatrix ); + TNL_CHECK_CUDA_DEVICE; +#endif + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector1, typename Vector2 > +__cuda_callable__ +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ); + if( row > 0 ) + sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ]; + if( row < this->getColumns() - 1 ) + sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ]; + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum ); +} + + +// copy assignment +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::operator=( const Multidiagonal& matrix ) +{ + this->setLike( matrix ); + this->values = matrix.values; + return *this; +} + +// cross-device copy assignment +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_, typename IndexAllocator_ > +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix ) +{ + using RHSMatrix = Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >; + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType; + + this->setLike( matrix ); + if( RowMajorOrder == RowMajorOrder_ ) + this->values = matrix.getValues(); + else + { + if( std::is_same< Device, Device_ >::value ) + { + const auto matrix_view = matrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value, bool& compute ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + else + { + const IndexType maxRowLength = this->diagonalsShifts.getSize(); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType, RHSIndexAllocatorType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix + auto f2 = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType localIdx, const IndexType columnIndex, RealType& value, bool& compute ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + value = thisValuesBuffer_view[ bufferIdx ]; + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + } + } + return *this; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( File& file ) const +{ + Matrix< Real, Device, Index >::save( file ); + file << diagonalsShifts; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( File& file ) +{ + Matrix< Real, Device, Index >::load( file ); + file >> this->diagonalsShifts; + this->hostDiagonalsShifts = this->diagonalsShifts; + const IndexType minShift = min( diagonalsShifts ); + IndexType nonemptyRows = min( this->getRows(), this->getColumns() ); + if( this->getRows() > this->getColumns() && minShift < 0 ) + nonemptyRows = min( this->getRows(), nonemptyRows - minShift ); + this->indexer.set( this->getRows(), this->getColumns(), diagonalsShifts.getSize(), nonemptyRows ); + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( const String& fileName ) +{ + Object::load( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +void +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +print( std::ostream& str ) const +{ + this->view.print( str ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +auto +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +Index Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getElementIndex( const IndexType row, const IndexType column ) const +{ + IndexType localIdx = column - row; + if( row > 0 ) + localIdx++; + + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + + return this->indexer.getGlobalIndex( row, localIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +Index +Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +getPaddingIndex() const +{ + return this->view.getPaddingIndex(); +} + +/* +template<> +class MultidiagonalDeviceDependentCode< Devices::Host > +{ + public: + + typedef Devices::Host Device; + + template< typename Index > + __cuda_callable__ + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return 2*row + column; + } + + template< typename Vector, + typename Index, + typename ValuesType > + __cuda_callable__ + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ 1 ]; + Index i = 3 * row; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i - 1 ] + + vector[ row ] * values[ i ]; + return vector[ row - 1 ] * values[ i - 1 ] + + vector[ row ] * values[ i ] + + vector[ row + 1 ] * values[ i + 1 ]; + } + + template< typename Real, + typename Index, + typename InVector, + typename OutVector > + static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix, + const InVector& inVector, + OutVector& outVector ) + { +#ifdef HAVE_OPENMP +#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) +#endif + for( Index row = 0; row < matrix.getRows(); row ++ ) + outVector[ row ] = matrix.rowVectorProduct( row, inVector ); + } +}; + +template<> +class MultidiagonalDeviceDependentCode< Devices::Cuda > +{ + public: + + typedef Devices::Cuda Device; + + template< typename Index > + __cuda_callable__ + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return ( column - row + 1 )*rows + row - 1; + } + + template< typename Vector, + typename Index, + typename ValuesType > + __cuda_callable__ + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ rows - 1 ]; + Index i = row - 1; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ]; + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ] + + vector[ row + 1 ] * values[ i + 2*rows ]; + } + + template< typename Real, + typename Index, + typename InVector, + typename OutVector > + static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix, + const InVector& inVector, + OutVector& outVector ) + { + MatrixVectorProductCuda( matrix, inVector, outVector ); + } +}; + */ + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.h b/src/TNL/Matrices/MultidiagonalMatrixRowView.h new file mode 100644 index 0000000000000000000000000000000000000000..0825d6fb365ebd6552ee033d41a1fe208219a14e --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.h @@ -0,0 +1,64 @@ +/*************************************************************************** + MultidiagonalMatrixRowView.h - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename ValuesView, + typename Indexer, + typename DiagonalsShiftsView_ > +class MultidiagonalMatrixRowView +{ + public: + + using RealType = typename ValuesView::RealType; + using IndexType = typename ValuesView::IndexType; + using ValuesViewType = ValuesView; + using IndexerType = Indexer; + using DiagonalsShiftsView = DiagonalsShiftsView_; + + __cuda_callable__ + MultidiagonalMatrixRowView( const IndexType rowIdx, + const DiagonalsShiftsView& diagonalsShifts, + const ValuesViewType& values, + const IndexerType& indexer); + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + const IndexType getColumnIndex( const IndexType localIdx ) const; + + __cuda_callable__ + const RealType& getValue( const IndexType localIdx ) const; + + __cuda_callable__ + RealType& getValue( const IndexType localIdx ); + + __cuda_callable__ + void setElement( const IndexType localIdx, + const RealType& value ); + protected: + + IndexType rowIdx; + + DiagonalsShiftsView diagonalsShifts; + + ValuesViewType values; + + Indexer indexer; +}; + +} // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/MultidiagonalMatrixRowView.hpp> diff --git a/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..855b8463aa13eb5d21bee65923704d2be1d897ba --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrixRowView.hpp @@ -0,0 +1,76 @@ +/*************************************************************************** + MultidiagonalMatrixRowView.hpp - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +__cuda_callable__ +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +MultidiagonalMatrixRowView( const IndexType rowIdx, + const DiagonalsShiftsView& diagonalsShifts, + const ValuesViewType& values, + const IndexerType& indexer ) +: rowIdx( rowIdx ), diagonalsShifts( diagonalsShifts ), values( values ), indexer( indexer ) +{ +} + +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +__cuda_callable__ +auto +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +getSize() const -> IndexType +{ + return diagonalsShifts.getSize();//indexer.getRowSize( rowIdx ); +} + +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +__cuda_callable__ +auto +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +getColumnIndex( const IndexType localIdx ) const -> const IndexType +{ + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, indexer.getDiagonals(), "" ); + return rowIdx + diagonalsShifts[ localIdx ]; +} + +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +__cuda_callable__ +auto +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +getValue( const IndexType localIdx ) const -> const RealType& +{ + return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; +} + +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +__cuda_callable__ +auto +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +getValue( const IndexType localIdx ) -> RealType& +{ + return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; +} + +template< typename ValuesView, typename Indexer, typename DiagonalsShiftsView > +__cuda_callable__ +void +MultidiagonalMatrixRowView< ValuesView, Indexer, DiagonalsShiftsView >:: +setElement( const IndexType localIdx, + const RealType& value ) +{ + this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value; +} + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h new file mode 100644 index 0000000000000000000000000000000000000000..97ff94f85c6b81cb06b6832c1836ace71a7fdbbd --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrixView.h @@ -0,0 +1,187 @@ +/*************************************************************************** + MultidiagonalMatrixView.h - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/MatrixView.h> +#include <TNL/Containers/Vector.h> +#include <TNL/Matrices/MultidiagonalMatrixRowView.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Matrices/details/MultidiagonalMatrixIndexer.h> + +namespace TNL { +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > +{ + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using BaseType = MatrixView< Real, Device, Index >; + //using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType >; + using DiagonalsShiftsView = Containers::VectorView< IndexType, DeviceType, IndexType >; + //using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; + using HostDiagonalsShiftsView = Containers::VectorView< IndexType, Devices::Host, IndexType >; + using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using ValuesViewType = typename BaseType::ValuesView; + using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >; + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value > + using Self = MultidiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >; + + MultidiagonalMatrixView(); + + MultidiagonalMatrixView( const ValuesViewType& values, + const DiagonalsShiftsView& diagonalsShifts, + const HostDiagonalsShiftsView& hostDiagonalsShifts, + const IndexerType& indexer ); + + ViewType getView(); + + ConstViewType getConstView() const; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + __cuda_callable__ + const IndexType& getDiagonalsCount() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + IndexType getNonemptyRowsCount() const; + + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; + + IndexType getMaxRowLength() const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + void setValue( const RealType& v ); + + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + RealType getElement( const IndexType row, + const IndexType column ) const; + + MultidiagonalMatrixView& operator=( const MultidiagonalMatrixView& view ); + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + void addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + void save( File& file ) const; + + void save( const String& fileName ) const; + + void print( std::ostream& str ) const; + + __cuda_callable__ + const IndexerType& getIndexer() const; + + __cuda_callable__ + IndexerType& getIndexer(); + + __cuda_callable__ + IndexType getPaddingIndex() const; + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType localIdx ) const; + + DiagonalsShiftsView diagonalsShifts; + + HostDiagonalsShiftsView hostDiagonalsShifts; + + IndexerType indexer; +}; + +} // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/MultidiagonalMatrixView.hpp> diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ecfe1c1d8310577812aa2e46c9cc1fc6d8e0035f --- /dev/null +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -0,0 +1,737 @@ +/*************************************************************************** + MultidiagonalMatrixView.hpp - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Assert.h> +#include <TNL/Matrices/MultidiagonalMatrixView.h> +#include <TNL/Exceptions/NotImplementedError.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView() +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView( const ValuesViewType& values, + const DiagonalsShiftsView& diagonalsShifts, + const HostDiagonalsShiftsView& hostDiagonalsShifts, + const IndexerType& indexer ) +: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), + diagonalsShifts( diagonalsShifts ), + hostDiagonalsShifts( hostDiagonalsShifts ), + indexer( indexer ) +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getView() -> ViewType +{ + return ViewType( const_cast< MultidiagonalMatrixView* >( this )->values.getView(), + const_cast< MultidiagonalMatrixView* >( this )->diagonalsShifts.getView(), + const_cast< MultidiagonalMatrixView* >( this )->hostDiagonalsShifts.getView(), + indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->values.getConstView(), + this->diagonalsShifts.getConstView(), + this->hostDiagonalsShifts.getConstView(), + indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationType() +{ + return String( "Matrices::Multidiagonal< " ) + + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +const Index& +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getDiagonalsCount() const +{ + return this->diagonalsShifts.getSize(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getNonemptyRowsCount() const +{ + return this->indexer.getNonemptyRowsCount(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRowLength( const IndexType row ) const +{ + return this->diagonalsShifts.getSize(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getMaxRowLength() const +{ + return this->diagonalsShifts.getSize(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +bool +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +bool +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +{ + return ! this->operator==( matrix ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +setValue( const RealType& v ) +{ + // we dont do this->values = v here because it would set even elements 'outside' the matrix + // method getNumberOfNonzeroElements would not well + const RealType newValue = v; + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType columnIdx, RealType& value, bool& compute ) mutable { + value = newValue; + }; + this->forAllRows( f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + + for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) + if( row + hostDiagonalsShifts[ i ] == column ) + { + this->values.setElement( this->getElementIndex( row, i ), value ); + return; + } + if( value != 0.0 ) + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in multidiagonal matrix."; + throw std::logic_error( msg.str() ); + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + + for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) + if( row + hostDiagonalsShifts[ i ] == column ) + { + const Index idx = this->getElementIndex( row, i ); + this->values.setElement( idx, thisElementMultiplicator * this->values.getElement( idx ) + value ); + return; + } + if( value != 0.0 ) + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in multidiagonal matrix."; + throw std::logic_error( msg.str() ); + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Real +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getElement( const IndexType row, const IndexType column ) const +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + + for( IndexType i = 0; i < hostDiagonalsShifts.getSize(); i++ ) + if( row + hostDiagonalsShifts[ i ] == column ) + return this->values.getElement( this->getElementIndex( row, i ) ); + return 0.0; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >& +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator=( const MultidiagonalMatrixView& view ) +{ + MatrixView< Real, Device, Index >::operator=( view ); + this->diagonalsShifts.bind( view.diagonalsShifts ); + this->hostDiagonalsShifts.bind( view.hostDiagonalsShifts ); + this->indexer = view.indexer; + return *this; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const +{ + using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); + const auto values_view = this->values.getConstView(); + const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); + const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const IndexType columns = this->getColumns(); + const auto indexer = this->indexer; + const auto zero = zero_; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + Real_ sum( zero ); + for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) + { + const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + if( columnIdx >= 0 && columnIdx < columns ) + reduce( sum, fetch( rowIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ] ) ); + } + keep( rowIdx, sum ); + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto values_view = this->values.getConstView(); + const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); + const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const IndexType columns = this->getColumns(); + const auto indexer = this->indexer; + bool compute( true ); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + for( IndexType localIdx = 0; localIdx < diagonalsCount; localIdx++ ) + { + const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + if( columnIdx >= 0 && columnIdx < columns ) + function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto values_view = this->values.getView(); + const auto diagonalsShifts_view = this->diagonalsShifts.getConstView(); + const IndexType diagonalsCount = this->diagonalsShifts.getSize(); + const IndexType columns = this->getColumns(); + const auto indexer = this->indexer; + bool compute( true ); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + for( IndexType localIdx = 0; localIdx < diagonalsCount && compute; localIdx++ ) + { + const IndexType columnIdx = rowIdx + diagonalsShifts_view[ localIdx ]; + if( columnIdx >= 0 && columnIdx < columns ) + function( rowIdx, localIdx, columnIdx, values_view[ indexer.getGlobalIndex( rowIdx, localIdx ) ], compute ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->indexer.getNonemptyRowsCount(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +template< typename Vector > +__cuda_callable__ +typename Vector::RealType +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename InVector, + typename OutVector > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const +{ + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); + + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType { + return value * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." ); + TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." ); + + /*if( RowMajorOrder == RowMajorOrder_ ) + { + if( thisMatrixMultiplicator == 1.0 ) + this->values += matrixMultiplicator * matrix.getValues(); + else + this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues(); + } + else + { + const auto matrix_view = matrix; + const auto matrixMult = matrixMultiplicator; + const auto thisMult = thisMatrixMultiplicator; + auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + if( thisMult == 0.0 ) + this->forAllRows( add0 ); + else if( thisMult == 1.0 ) + this->forAllRows( add1 ); + else + this->forAllRows( addGen ); + }*/ +} + +#ifdef HAVE_CUDA +/*template< typename Real, + typename Real2, + typename Index, + typename Index2 > +__global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, + Multidiagonal< Real, Devices::Cuda, Index >* outMatrix, + const Real matrixMultiplicator, + const Index gridIdx ) +{ + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < inMatrix->getRows() ) + { + if( rowIdx > 0 ) + outMatrix->setElementFast( rowIdx-1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); + outMatrix->setElementFast( rowIdx, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); + if( rowIdx < inMatrix->getRows()-1 ) + outMatrix->setElementFast( rowIdx+1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); + } +}*/ +#endif + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real2, typename Index2 > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getRows() == matrix.getRows(), + std::cerr << "This matrix rows: " << this->getRows() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + for( IndexType i = 1; i < rows; i++ ) + { + RealType aux = matrix. getElement( i, i - 1 ); + this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); + this->setElement( i, i, matrix.getElement( i, i ) ); + this->setElement( i - 1, i, aux ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + /*Multidiagonal* kernel_this = Cuda::passToDevice( *this ); + typedef Multidiagonal< Real2, Device, Index2 > InMatrixType; + InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + MultidiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_inMatrix, + kernel_this, + matrixMultiplicator, + gridIdx ); + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inMatrix ); + TNL_CHECK_CUDA_DEVICE;*/ +#endif + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector1, typename Vector2 > +__cuda_callable__ +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ); + if( row > 0 ) + sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ]; + if( row < this->getColumns() - 1 ) + sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ]; + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum ); +} + + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const +{ + MatrixView< Real, Device, Index >::save( file ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const +{ + for( IndexType rowIdx = 0; rowIdx < this->getRows(); rowIdx++ ) + { + str <<"Row: " << rowIdx << " -> "; + for( IndexType localIdx = 0; localIdx < this->hostDiagonalsShifts.getSize(); localIdx++ ) + { + const IndexType columnIdx = rowIdx + this->hostDiagonalsShifts[ localIdx ]; + if( columnIdx >= 0 && columnIdx < this->columns ) + { + auto v = this->values.getElement( this->indexer.getGlobalIndex( rowIdx, localIdx ) ); + if( v ) + str << " Col:" << columnIdx << "->" << v << "\t"; + } + } + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getElementIndex( const IndexType row, const IndexType localIdx ) const +{ + return this->indexer.getGlobalIndex( row, localIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getPaddingIndex() const +{ + return -1; +} + + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..032767518cc275e707961af48e474f4210256ef1 --- /dev/null +++ b/src/TNL/Matrices/SparseMatrix.h @@ -0,0 +1,245 @@ +/*************************************************************************** + SparseMatrix.h - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/Matrix.h> +#include <TNL/Matrices/MatrixType.h> +#include <TNL/Allocators/Default.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Matrices/SparseMatrixRowView.h> +#include <TNL/Matrices/SparseMatrixView.h> +#include <TNL/Matrices/Dense.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device = Devices::Host, + typename Index = int, + typename MatrixType = GeneralMatrix, + template< typename Device_, typename Index_, typename IndexAllocator_ > class Segments = Containers::Segments::CSR, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, + typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > +class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > +{ + public: + static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; + static constexpr bool isBinary() { return MatrixType::isBinary(); }; + + static_assert( + ! isSymmetric() || + ! std::is_same< Device, Devices::Cuda >::value || + ( std::is_same< Real, float >::value || std::is_same< Real, double >::value || std::is_same< Real, int >::value || std::is_same< Real, long long int >::value ), + "Given Real type is not supported by atomic operations on GPU which are necessary for symmetric operations." ); + + using RealType = Real; + template< typename Device_, typename Index_, typename IndexAllocator_ > + using SegmentsTemplate = Segments< Device_, Index_, IndexAllocator_ >; + using SegmentsType = Segments< Device, Index, IndexAllocator >; + template< typename Device_, typename Index_ > + using SegmentsViewTemplate = typename SegmentsType::template ViewTemplate< Device_, Index >; + using SegmentsViewType = typename SegmentsType::ViewType; + using SegmentViewType = typename SegmentsType::SegmentViewType; + using DeviceType = Device; + using IndexType = Index; + using RealAllocatorType = RealAllocator; + using IndexAllocatorType = IndexAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using RowsCapacitiesType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; + using ValuesVectorType = typename Matrix< Real, Device, Index, RealAllocator >::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; + using ColumnsIndexesVectorType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; + using ColumnsIndexesViewType = typename ColumnsIndexesVectorType::ViewType; + using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; + using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; + using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >; + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + SparseMatrix( const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + + SparseMatrix( const SparseMatrix& m ); + + SparseMatrix( const SparseMatrix&& m ); + + SparseMatrix( const IndexType rows, + const IndexType columns, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + + SparseMatrix( const std::initializer_list< IndexType >& rowCapacities, + const IndexType columns, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + + SparseMatrix( const IndexType rows, + const IndexType columns, + const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data, + const RealAllocatorType& realAllocator = RealAllocatorType(), + const IndexAllocatorType& indexAllocator = IndexAllocatorType() ); + + ViewType getView() const; // TODO: remove const + + ConstViewType getConstView() const; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + template< typename RowsCapacitiesVector > + void setCompressedRowLengths( const RowsCapacitiesVector& rowCapacities ); + + // TODO: Remove this when possible + void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) { + this->setCompressedRowLengths( rowLengths ); + }; + + void setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data ); + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + [[deprecated]] + virtual IndexType getRowLength( const IndexType row ) const { return 0;}; + + template< typename Matrix > + void setLike( const Matrix& matrix ); + + IndexType getNumberOfNonzeroMatrixElements() const; + + void reset(); + + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ); + + RealType getElement( const IndexType row, + const IndexType column ) const; + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + /*** + * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector + */ + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& outVectorMultiplicator = 0.0 ) const; + + /*template< typename Real2, typename Index2 > + void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + */ + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector1, typename Vector2 > + bool performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + /** + * \brief Assignment of exactly the same matrix type. + * @param matrix + * @return + */ + SparseMatrix& operator=( const SparseMatrix& matrix ); + + /** + * \brief Assignment of dense matrix + */ + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > + SparseMatrix& operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ); + + + /** + * \brief Assignment of any other matrix type. + * @param matrix + * @return + */ + template< typename RHSMatrix > + SparseMatrix& operator=( const RHSMatrix& matrix ); + + void save( File& file ) const; + + void load( File& file ); + + void save( const String& fileName ) const; + + void load( const String& fileName ); + + void print( std::ostream& str ) const; + + __cuda_callable__ + IndexType getPaddingIndex() const; + +// TODO: restore it and also in Matrix +// protected: + + ColumnsIndexesVectorType columnIndexes; + + SegmentsType segments; + + IndexAllocator indexAllocator; + + //RealAllocator realAllocator; + + ViewType view; +}; + +} // namespace Conatiners +} // namespace TNL + +#include <TNL/Matrices/SparseMatrix.hpp> diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4c1f3b1ce41d27adf2a804e7171ec21f89ba7313 --- /dev/null +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -0,0 +1,955 @@ +/*************************************************************************** + SparseMatrix.hpp - description + ------------------- + begin : Nov 29, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <functional> +#include <sstream> +#include <TNL/Algorithms/Reduction.h> +#include <TNL/Matrices/SparseMatrix.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) + : BaseType( realAllocator ), columnIndexes( indexAllocator ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const SparseMatrix& m ) + : Matrix< Real, Device, Index, RealAllocator >( m ), columnIndexes( m.columnIndexes ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const SparseMatrix&& m ) + : Matrix< Real, Device, Index, RealAllocator >( std::move( m ) ), columnIndexes( std::move( m.columnIndexes ) ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const IndexType rows, + const IndexType columns, + const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) +: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const std::initializer_list< IndexType >& rowCapacities, + const IndexType columns, + const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) +: BaseType( rowCapacities.size(), columns, realAllocator ), columnIndexes( indexAllocator ) +{ + this->setCompressedRowLengths( RowsCapacitiesType( rowCapacities ) ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +SparseMatrix( const IndexType rows, + const IndexType columns, + const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data, + const RealAllocatorType& realAllocator, + const IndexAllocatorType& indexAllocator ) +: BaseType( rows, columns, realAllocator ), columnIndexes( indexAllocator ) +{ + this->setElements( data ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getView() const -> ViewType +{ + return ViewType( this->getRows(), + this->getColumns(), + const_cast< SparseMatrix* >( this )->getValues().getView(), // TODO: remove const_cast + const_cast< SparseMatrix* >( this )->columnIndexes.getView(), + const_cast< SparseMatrix* >( this )->segments.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->getRows(), + this->getColumns(), + this->getValues().getConstView(), + this->columnIndexes.getConstView(), + this->segments.getConstView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +String +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getSerializationType() +{ + return String( "Matrices::SparseMatrix< " ) + + TNL::getSerializationType< RealType >() + ", " + + TNL::getSerializationType< SegmentsType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +String +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename RowsCapacitiesVector > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setCompressedRowLengths( const RowsCapacitiesVector& rowsCapacities ) +{ + TNL_ASSERT_EQ( rowsCapacities.getSize(), this->getRows(), "Number of matrix rows does not fit with rowLengths vector size." ); + using RowsCapacitiesVectorDevice = typename RowsCapacitiesVector::DeviceType; + if( std::is_same< DeviceType, RowsCapacitiesVectorDevice >::value ) + this->segments.setSegmentsSizes( rowsCapacities ); + else + { + RowsCapacitiesType thisRowsCapacities; + thisRowsCapacities = rowsCapacities; + this->segments.setSegmentsSizes( thisRowsCapacities ); + } + if( ! isBinary() ) + { + this->values.setSize( this->segments.getStorageSize() ); + this->values = ( RealType ) 0; + } + this->columnIndexes.setSize( this->segments.getStorageSize() ); + this->columnIndexes = this->getPaddingIndex(); + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setElements( const std::initializer_list< std::tuple< IndexType, IndexType, RealType > >& data ) +{ + const auto& rows = this->getRows(); + const auto& columns = this->getColumns(); + Containers::Vector< IndexType, Devices::Host, IndexType > rowCapacities( rows, 0 ); + for( const auto& i : data ) + { + if( std::get< 0 >( i ) >= rows ) + { + std::stringstream s; + s << "Wrong row index " << std::get< 0 >( i ) << " in an initializer list"; + throw std::logic_error( s.str() ); + } + rowCapacities[ std::get< 0 >( i ) ]++; + } + SparseMatrix< Real, Devices::Host, Index, MatrixType, Segments > hostMatrix( rows, columns ); + hostMatrix.setCompressedRowLengths( rowCapacities ); + for( const auto& i : data ) + { + if( std::get< 1 >( i ) >= columns ) + { + std::stringstream s; + s << "Wrong column index " << std::get< 1 >( i ) << " in an initializer list"; + throw std::logic_error( s.str() ); + } + hostMatrix.setElement( std::get< 0 >( i ), std::get< 1 >( i ), std::get< 2 >( i ) ); + } + ( *this ) = hostMatrix; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + this->view.getCompressedRowLengths( rowLengths ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Matrix_ > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setLike( const Matrix_& matrix ) +{ + BaseType::setLike( matrix ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +Index +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getNumberOfNonzeroMatrixElements() const +{ + return this->view.getNumberOfNonzeroMatrixElements(); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +reset() +{ + BaseType::reset(); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + return this->view.getRow( rowIdx ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ auto +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + return this->view.getRow( rowIdx ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +setElement( const IndexType row, + const IndexType column, + const RealType& value ) +{ + this->view.setElement( row, column, value ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + this->view.addElement( row, column, value, thisElementMultiplicator ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +Real +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getElement( const IndexType row, + const IndexType column ) const +{ + return this->view.getElement( row, column ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Vector > +__cuda_callable__ +typename Vector::RealType +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +rowVectorProduct( const IndexType row, + const Vector& vector ) const +{ + this->view.rowVectorProduct( row, vector ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +template< typename InVector, + typename OutVector > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator, + const RealType& outVectorMultiplicator ) const +{ + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator ); + /*TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); + + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> RealType { + const IndexType column = columnIndexesView[ globalIdx ]; + compute = ( column != paddingIndex ); + if( ! compute ) + return 0.0; + return valuesView[ globalIdx ] * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 );*/ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); + /*const auto columns_view = this->columnIndexes.getConstView(); + const auto values_view = this->values.getConstView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) { + IndexType columnIdx = columns_view[ globalIdx ]; + if( columnIdx != paddingIndex_ ) + return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero );*/ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + this->view.forRows( first, last, function ); + /*const auto columns_view = this->columnIndexes.getConstView(); + const auto values_view = this->values.getConstView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f ); + */ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + this->view.forRows( first, last, function ); + /*auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx ) mutable -> bool { + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ] ); + return true; + }; + this->segments.forSegments( first, last, f );*/ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Function > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + +/*template< typename Real, + template< typename, typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + +} + +template< typename Real, + template< typename, typename, typename > class Segments, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +template< typename Real2, typename Index2 > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + +}*/ + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +template< typename Vector1, typename Vector2 > +bool +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + return false; +} + +// copy assignment +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +operator=( const SparseMatrix& matrix ) +{ + Matrix< Real, Device, Index >::operator=( matrix ); + this->columnIndexes = matrix.columnIndexes; + this->segments = matrix.segments; + this->view = this->getView(); + return *this; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +operator=( const Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ) +{ + using RHSMatrix = Dense< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >; + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + this->setLike( matrix ); + this->setCompressedRowLengths( rowLengths ); + Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); + rowLocalIndexes = 0; + + // TODO: use getConstView when it works + const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + auto rowLocalIndexes_view = rowLocalIndexes.getView(); + columns_view = paddingIndex; + + if( std::is_same< DeviceType, RHSDeviceType >::value ) + { + const auto segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIdx, const RHSRealType& value, bool& compute ) mutable { + if( value != 0.0 ) + { + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, rowLocalIndexes_view[ rowIdx ]++ ); + columns_view[ thisGlobalIdx ] = columnIdx; + if( ! isBinary() ) + values_view[ thisGlobalIdx ] = value; + } + }; + matrix.forAllRows( f ); + } + else + { + const IndexType maxRowLength = matrix.getColumns(); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType > thisColumnsBuffer( bufferSize ); + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + thisColumnsBuffer = paddingIndex; + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixValuesBuffer_view[ bufferIdx ] = value; + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix and ignoring + // zero matrix elements. + const IndexType matrix_columns = this->getColumns(); + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { + RealType inValue( 0.0 ); + IndexType bufferIdx, column( rowLocalIndexes_view[ rowIdx ] ); + while( inValue == 0.0 && column < matrix_columns ) + { + bufferIdx = ( rowIdx - baseRow ) * maxRowLength + column++; + inValue = thisValuesBuffer_view[ bufferIdx ]; + } + rowLocalIndexes_view[ rowIdx ] = column; + if( inValue == 0.0 ) + { + columnIndex = paddingIndex; + value = 0.0; + } + else + { + columnIndex = column - 1; + value = inValue; + } + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + //std::cerr << "This matrix = " << std::endl << *this << std::endl; + } + this->view = this->getView(); + return *this; + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > + template< typename RHSMatrix > +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +operator=( const RHSMatrix& matrix ) +{ + using RHSIndexType = typename RHSMatrix::IndexType; + using RHSRealType = typename RHSMatrix::RealType; + using RHSDeviceType = typename RHSMatrix::DeviceType; + using RHSRealAllocatorType = typename RHSMatrix::RealAllocatorType; + + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > rowLengths; + matrix.getCompressedRowLengths( rowLengths ); + this->setDimensions( matrix.getRows(), matrix.getColumns() ); + this->setCompressedRowLengths( rowLengths ); + Containers::Vector< IndexType, DeviceType, IndexType > rowLocalIndexes( matrix.getRows() ); + rowLocalIndexes = 0; + + // TODO: use getConstView when it works + const auto matrixView = const_cast< RHSMatrix& >( matrix ).getView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + auto rowLocalIndexes_view = rowLocalIndexes.getView(); + columns_view = paddingIndex; + + if( std::is_same< DeviceType, RHSDeviceType >::value ) + { + const auto segments_view = this->segments.getView(); + auto f = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx_, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + IndexType localIdx( rowLocalIndexes_view[ rowIdx ] ); + if( value != 0.0 && columnIndex != paddingIndex ) + { + IndexType thisGlobalIdx = segments_view.getGlobalIndex( rowIdx, localIdx++ ); + columns_view[ thisGlobalIdx ] = columnIndex; + if( ! isBinary() ) + values_view[ thisGlobalIdx ] = value; + rowLocalIndexes_view[ rowIdx ] = localIdx; + } + }; + matrix.forAllRows( f ); + } + else + { + const IndexType maxRowLength = max( rowLengths ); + const IndexType bufferRowsCount( 128 ); + const size_t bufferSize = bufferRowsCount * maxRowLength; + Containers::Vector< RHSRealType, RHSDeviceType, RHSIndexType, RHSRealAllocatorType > matrixValuesBuffer( bufferSize ); + Containers::Vector< RHSIndexType, RHSDeviceType, RHSIndexType > matrixColumnsBuffer( bufferSize ); + Containers::Vector< RealType, DeviceType, IndexType, RealAllocatorType > thisValuesBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType > thisColumnsBuffer( bufferSize ); + Containers::Vector< IndexType, DeviceType, IndexType > thisRowLengths; + thisRowLengths = rowLengths; + auto matrixValuesBuffer_view = matrixValuesBuffer.getView(); + auto matrixColumnsBuffer_view = matrixColumnsBuffer.getView(); + auto thisValuesBuffer_view = thisValuesBuffer.getView(); + auto thisColumnsBuffer_view = thisColumnsBuffer.getView(); + matrixValuesBuffer_view = 0.0; + + IndexType baseRow( 0 ); + const IndexType rowsCount = this->getRows(); + while( baseRow < rowsCount ) + { + const IndexType lastRow = min( baseRow + bufferRowsCount, rowsCount ); + thisColumnsBuffer = paddingIndex; + matrixColumnsBuffer_view = paddingIndex; + + //// + // Copy matrix elements into buffer + auto f1 = [=] __cuda_callable__ ( RHSIndexType rowIdx, RHSIndexType localIdx, RHSIndexType columnIndex, const RHSRealType& value, bool& compute ) mutable { + if( columnIndex != paddingIndex ) + { + const IndexType bufferIdx = ( rowIdx - baseRow ) * maxRowLength + localIdx; + matrixColumnsBuffer_view[ bufferIdx ] = columnIndex; + matrixValuesBuffer_view[ bufferIdx ] = value; + //std::cerr << " <<<<< rowIdx = " << rowIdx << " localIdx = " << localIdx << " value = " << value << " bufferIdx = " << bufferIdx << std::endl; + } + }; + matrix.forRows( baseRow, lastRow, f1 ); + + //// + // Copy the source matrix buffer to this matrix buffer + thisValuesBuffer_view = matrixValuesBuffer_view; + thisColumnsBuffer_view = matrixColumnsBuffer_view; + + //// + // Copy matrix elements from the buffer to the matrix and ignoring + // zero matrix elements + const IndexType matrix_columns = this->getColumns(); + const auto thisRowLengths_view = thisRowLengths.getConstView(); + auto f2 = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType& columnIndex, RealType& value, bool& compute ) mutable { + RealType inValue( 0.0 ); + size_t bufferIdx; + IndexType bufferLocalIdx( rowLocalIndexes_view[ rowIdx ] ); + while( inValue == 0.0 && localIdx < thisRowLengths_view[ rowIdx ] ) + { + bufferIdx = ( rowIdx - baseRow ) * maxRowLength + bufferLocalIdx++; + TNL_ASSERT_LT( bufferIdx, bufferSize, "" ); + inValue = thisValuesBuffer_view[ bufferIdx ]; + } + //std::cerr << "rowIdx = " << rowIdx << " localIdx = " << localIdx << " bufferLocalIdx = " << bufferLocalIdx + // << " inValue = " << inValue << " bufferIdx = " << bufferIdx << std::endl; + rowLocalIndexes_view[ rowIdx ] = bufferLocalIdx; + if( inValue == 0.0 ) + { + columnIndex = paddingIndex; + value = 0.0; + } + else + { + columnIndex = thisColumnsBuffer_view[ bufferIdx ];//column - 1; + value = inValue; + } + }; + this->forRows( baseRow, lastRow, f2 ); + baseRow += bufferRowsCount; + } + } + this->view = this->getView(); + return *this; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +save( File& file ) const +{ + this->view.save( file ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +load( File& file ) +{ + Matrix< RealType, DeviceType, IndexType >::load( file ); + file >> this->columnIndexes; + this->segments.load( file ); + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +load( const String& fileName ) +{ + Object::load( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +void +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +print( std::ostream& str ) const +{ + this->view.print( str ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename, typename > class Segments, + typename RealAllocator, + typename IndexAllocator > +__cuda_callable__ +Index +SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: +getPaddingIndex() const +{ + return -1; +} + + } //namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h new file mode 100644 index 0000000000000000000000000000000000000000..8906ab5ae9fd1457ee6690597898a001bdab7c18 --- /dev/null +++ b/src/TNL/Matrices/SparseMatrixRowView.h @@ -0,0 +1,67 @@ + /*************************************************************************** + SparseMatrixRowView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView, + bool isBinary_ > +class SparseMatrixRowView +{ + public: + + using RealType = typename ValuesView::RealType; + using SegmentViewType = SegmentView; + using IndexType = typename SegmentViewType::IndexType; + using ValuesViewType = ValuesView; + using ColumnsIndexesViewType = ColumnsIndexesView; + + static constexpr bool isBinary() { return isBinary_; }; + + __cuda_callable__ + SparseMatrixRowView( const SegmentViewType& segmentView, + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes ); + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + const IndexType& getColumnIndex( const IndexType localIdx ) const; + + __cuda_callable__ + IndexType& getColumnIndex( const IndexType localIdx ); + + __cuda_callable__ + const RealType& getValue( const IndexType localIdx ) const; + + __cuda_callable__ + RealType& getValue( const IndexType localIdx ); + + __cuda_callable__ + void setElement( const IndexType localIdx, + const IndexType column, + const RealType& value ); + protected: + + SegmentViewType segmentView; + + ValuesViewType values; + + ColumnsIndexesViewType columnIndexes; +}; + } // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/SparseMatrixRowView.hpp> diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..67d0845d4af23dd57065b516b212a278fbd0fd5d --- /dev/null +++ b/src/TNL/Matrices/SparseMatrixRowView.hpp @@ -0,0 +1,111 @@ +/*************************************************************************** + SparseMatrixRowView.hpp - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/SparseMatrixRowView.h> + +namespace TNL { + namespace Matrices { + +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView, + bool isBinary_ > +__cuda_callable__ +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: +SparseMatrixRowView( const SegmentViewType& segmentView, + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes ) + : segmentView( segmentView ), values( values ), columnIndexes( columnIndexes ) +{ +} + +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView, + bool isBinary_ > +__cuda_callable__ auto +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: +getSize() const -> IndexType +{ + return segmentView.getSize(); +} + +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView, + bool isBinary_ > +__cuda_callable__ auto +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: +getColumnIndex( const IndexType localIdx ) const -> const IndexType& +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ]; +} + +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView, + bool isBinary_ > +__cuda_callable__ auto +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: +getColumnIndex( const IndexType localIdx ) -> IndexType& +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + return columnIndexes[ segmentView.getGlobalIndex( localIdx ) ]; +} + +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView, + bool isBinary_ > +__cuda_callable__ auto +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: +getValue( const IndexType localIdx ) const -> const RealType& +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); + return values[ segmentView.getGlobalIndex( localIdx ) ]; +} + +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView, + bool isBinary_ > +__cuda_callable__ auto +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: +getValue( const IndexType localIdx ) -> RealType& +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); + return values[ segmentView.getGlobalIndex( localIdx ) ]; +} + +template< typename SegmentView, + typename ValuesView, + typename ColumnsIndexesView, + bool isBinary_ > +__cuda_callable__ void +SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: +setElement( const IndexType localIdx, + const IndexType column, + const RealType& value ) +{ + TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); + const IndexType globalIdx = segmentView.getGlobalIndex( localIdx ); + columnIndexes[ globalIdx ] = column; + if( ! isBinary() ) + values[ globalIdx ] = value; +} + + + } // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/SparseMatrixView.h b/src/TNL/Matrices/SparseMatrixView.h new file mode 100644 index 0000000000000000000000000000000000000000..4fa65b70a09e7834aacac1ac80d74ee08c9e4ece --- /dev/null +++ b/src/TNL/Matrices/SparseMatrixView.h @@ -0,0 +1,168 @@ +/*************************************************************************** + SparseMatrixView.h - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/Matrix.h> +#include <TNL/Matrices/MatrixType.h> +#include <TNL/Allocators/Default.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Matrices/SparseMatrixRowView.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device = Devices::Host, + typename Index = int, + typename MatrixType = GeneralMatrix, + template< typename Device_, typename Index_ > class SegmentsView = Containers::Segments::CSRView > +class SparseMatrixView : public MatrixView< Real, Device, Index > +{ + public: + static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; + static constexpr bool isBinary() { return MatrixType::isBinary(); }; + + using RealType = Real; + template< typename Device_, typename Index_ > + using SegmentsViewTemplate = SegmentsView< Device_, Index_ >; + using SegmentsViewType = SegmentsView< Device, Index >; + using SegmentViewType = typename SegmentsViewType::SegmentViewType; + using DeviceType = Device; + using IndexType = Index; + using BaseType = MatrixView< Real, Device, Index >; + using RowsCapacitiesView = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ConstRowsCapacitiesView = typename RowsCapacitiesView::ConstViewType; + using ValuesViewType = typename BaseType::ValuesView; + using ColumnsIndexesViewType = Containers::VectorView< IndexType, DeviceType, IndexType >; + using ViewType = SparseMatrixView< typename std::remove_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; + using ConstViewType = SparseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; + using RowView = SparseMatrixRowView< SegmentViewType, ValuesViewType, ColumnsIndexesViewType, isBinary() >; + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + __cuda_callable__ + SparseMatrixView(); + + __cuda_callable__ + SparseMatrixView( const IndexType rows, + const IndexType columns, + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes, + const SegmentsViewType& segments ); + + __cuda_callable__ + SparseMatrixView( const SparseMatrixView& m ) = default; + + //__cuda_callable__ + //SparseMatrixView( const SparseMatrixView&& m ) = default; + + __cuda_callable__ + ViewType getView(); + + __cuda_callable__ + ConstViewType getConstView() const; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + void reset(); + + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + void addElement( IndexType row, + IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + RealType getElement( IndexType row, + IndexType column ) const; + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + /*** + * \brief This method computes outVector = matrixMultiplicator * ( *this ) * inVector + inVectorAddition * inVector + */ + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixMultiplicator = 1.0, + const RealType outVectorMultiplicator = 0.0 ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector1, typename Vector2 > + bool performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + SparseMatrixView& operator=( const SparseMatrixView& matrix ); + + void save( File& file ) const; + + void save( const String& fileName ) const; + + void print( std::ostream& str ) const; + + __cuda_callable__ + IndexType getPaddingIndex() const; + + protected: + + ColumnsIndexesViewType columnIndexes; + + SegmentsViewType segments; +}; + +} // namespace Conatiners +} // namespace TNL + +#include <TNL/Matrices/SparseMatrixView.hpp> diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2bae61f985c312279d60de9f809b71ea3a19629f --- /dev/null +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -0,0 +1,699 @@ +/*************************************************************************** + SparseMatrixView.hpp - description + ------------------- + begin : Dec 28, 2019 + copyright : (C) 2019 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <functional> +#include <TNL/Matrices/SparseMatrixView.h> +#include <TNL/Algorithms/Reduction.h> +#include <TNL/Algorithms/AtomicOperations.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +SparseMatrixView() +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +SparseMatrixView( const IndexType rows, + const IndexType columns, + const ValuesViewType& values, + const ColumnsIndexesViewType& columnIndexes, + const SegmentsViewType& segments ) + : MatrixView< Real, Device, Index >( rows, columns, values ), columnIndexes( columnIndexes ), segments( segments ) +{ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +auto +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getView() -> ViewType +{ + return ViewType( this->getRows(), + this->getColumns(), + this->getValues().getView(), + this->columnIndexes.getView(), + this->segments.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +auto +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->getRows(), + this->getColumns(), + this->getValues().getConstView(), + this->getColumnsIndexes().getConstView(), + this->segments.getConstView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +String +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getSerializationType() +{ + return String( "Matrices::SparseMatrix< " ) + + TNL::getSerializationType< RealType >() + ", " + + TNL::getSerializationType< SegmentsViewType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +String +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Vector > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getRowLength( const IndexType row ) const +{ + return 0; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getNumberOfNonzeroMatrixElements() const +{ + const auto columns_view = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); + if( ! isSymmetric() ) + { + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( columns_view[ i ] != paddingIndex ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->columnIndexes.getSize(), std::plus<>{}, fetch, 0 ); + } + else + { + const auto rows = this->getRows(); + const auto columns = this->getColumns(); + Containers::Vector< IndexType, DeviceType, IndexType > row_sums( this->getRows(), 0 ); + auto row_sums_view = row_sums.getView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { + const IndexType column = columnIndexesView[ globalIdx ]; + compute = ( column != paddingIndex ); + if( ! compute ) + return 0.0; + return 1 + ( column != row && column < rows && row < columns ); // the addition is for non-diagonal elements + }; + auto reduction = [] __cuda_callable__ ( IndexType& sum, const IndexType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const IndexType& value ) mutable { + row_sums_view[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( IndexType ) 0 ); + return sum( row_sums ); + } +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ auto +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ auto +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); + return RowView( this->segments.getSegmentView( rowIdx ), this->values.getView(), this->columnIndexes.getView() ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +setElement( const IndexType row, + const IndexType column, + const RealType& value ) +{ + this->addElement( row, column, value, 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +addElement( IndexType row, + IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." ); + TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." ); + TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." ); + TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." ); + + if( isSymmetric() && row < column ) + { + swap( row, column ); + TNL_ASSERT_LT( row, this->getRows(), "Column index is out of the symmetric part of the matrix after transposition." ); + TNL_ASSERT_LT( column,this->getColumns(), "Row index is out of the symmetric part of the matrix after transposition." ); + } + + const IndexType rowSize = this->segments.getSegmentSize( row ); + IndexType col( this->getPaddingIndex() ); + IndexType i; + IndexType globalIdx; + for( i = 0; i < rowSize; i++ ) + { + globalIdx = this->segments.getGlobalIndex( row, i ); + TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); + col = this->columnIndexes.getElement( globalIdx ); + if( col == column ) + { + if( ! isBinary() ) + this->values.setElement( globalIdx, thisElementMultiplicator * this->values.getElement( globalIdx ) + value ); + return; + } + if( col == this->getPaddingIndex() || col > column ) + break; + } + if( i == rowSize ) + { + std::stringstream msg; + msg << "The capacity of the sparse matrix row number " << row << " was exceeded."; + throw std::logic_error( msg.str() ); + } + if( col == this->getPaddingIndex() ) + { + this->columnIndexes.setElement( globalIdx, column ); + if( ! isBinary() ) + this->values.setElement( globalIdx, value ); + return; + } + else + { + IndexType j = rowSize - 1; + while( j > i ) + { + const IndexType globalIdx1 = this->segments.getGlobalIndex( row, j ); + const IndexType globalIdx2 = this->segments.getGlobalIndex( row, j - 1 ); + TNL_ASSERT_LT( globalIdx1, this->columnIndexes.getSize(), "" ); + TNL_ASSERT_LT( globalIdx2, this->columnIndexes.getSize(), "" ); + this->columnIndexes.setElement( globalIdx1, this->columnIndexes.getElement( globalIdx2 ) ); + if( ! isBinary() ) + this->values.setElement( globalIdx1, this->values.getElement( globalIdx2 ) ); + j--; + } + + this->columnIndexes.setElement( globalIdx, column ); + if( ! isBinary() ) + this->values.setElement( globalIdx, value ); + return; + } +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +Real +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getElement( IndexType row, + IndexType column ) const +{ + TNL_ASSERT_GE( row, 0, "Sparse matrix row index cannot be negative." ); + TNL_ASSERT_LT( row, this->getRows(), "Sparse matrix row index is larger than number of matrix rows." ); + TNL_ASSERT_GE( column, 0, "Sparse matrix column index cannot be negative." ); + TNL_ASSERT_LT( column, this->getColumns(), "Sparse matrix column index is larger than number of matrix columns." ); + + if( isSymmetric() && row < column ) + { + swap( row, column ); + if( row >= this->getRows() || column >= this->getColumns() ) + return 0.0; + } + + const IndexType rowSize = this->segments.getSegmentSize( row ); + for( IndexType i = 0; i < rowSize; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + TNL_ASSERT_LT( globalIdx, this->columnIndexes.getSize(), "" ); + const IndexType col = this->columnIndexes.getElement( globalIdx ); + if( col == column ) + { + if( isBinary() ) + return 1; + else + return this->values.getElement( globalIdx ); + } + } + return 0.0; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Vector > +__cuda_callable__ +typename Vector::RealType +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +rowVectorProduct( const IndexType row, + const Vector& vector ) const +{ + +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +template< typename InVector, + typename OutVector > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType matrixMultiplicator, + const RealType outVectorMultiplicator ) const +{ + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); + + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); + if( isSymmetric() ) + outVector *= outVectorMultiplicator; + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> RealType { + const IndexType column = columnIndexesView[ globalIdx ]; + compute = ( column != paddingIndex ); + if( ! compute ) + return 0.0; + if( isSymmetric() && column < row ) + { + if( isBinary() ) + Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * inVectorView[ row ] ); + else + Algorithms::AtomicOperations< DeviceType >::add( outVectorView[ column ], matrixMultiplicator * valuesView[ globalIdx ] * inVectorView[ row ] ); + } + if( isBinary() ) + return inVectorView[ column ]; + return valuesView[ globalIdx ] * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + if( isSymmetric() ) + outVectorView[ row ] += matrixMultiplicator * value; + else + { + if( outVectorMultiplicator == 0.0 ) + outVectorView[ row ] = matrixMultiplicator * value; + else + outVectorView[ row ] = outVectorMultiplicator * outVectorView[ row ] + matrixMultiplicator * value; + } + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + + /*const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + const auto valuesView = this->values.getConstView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset, bool& compute ) -> RealType { + const IndexType column = columnIndexesView[ offset ]; + compute = ( column != paddingIndex ); + if( ! compute ) + return 0.0; + return valuesView[ offset ] * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); + */ +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchValue& zero ) const +{ + const auto columns_view = this->columnIndexes.getConstView(); + const auto values_view = this->values.getConstView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto fetch_ = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> decltype( fetch( IndexType(), IndexType(), IndexType(), RealType() ) ) { + IndexType columnIdx = columns_view[ globalIdx ]; + if( columnIdx != paddingIndex_ ) + { + if( isBinary() ) + return fetch( rowIdx, columnIdx, globalIdx, 1 ); + else + return fetch( rowIdx, columnIdx, globalIdx, values_view[ globalIdx ] ); + } + return zero; + }; + this->segments.segmentsReduction( first, last, fetch_, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Function > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto columns_view = this->columnIndexes.getConstView(); + const auto values_view = this->values.getConstView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable -> bool { + if( isBinary() ) + function( rowIdx, localIdx, columns_view[ globalIdx ], 1, compute ); + else + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); + return true; + }; + this->segments.forSegments( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Function > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto columns_view = this->columnIndexes.getView(); + auto values_view = this->values.getView(); + const IndexType paddingIndex_ = this->getPaddingIndex(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) mutable { + if( isBinary() ) + { + RealType one( columns_view[ globalIdx ] != paddingIndex_ ); + function( rowIdx, localIdx, columns_view[ globalIdx ], one, compute ); + } + else + function( rowIdx, localIdx, columns_view[ globalIdx ], values_view[ globalIdx ], compute ); + }; + this->segments.forSegments( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Function > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > + template< typename Function > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->getRows(), function ); +} + +/*template< typename Real, + template< typename, typename > class SegmentsView, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename IndexAllocator2 > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +addMatrix( const SparseMatrixView< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + +} + +template< typename Real, + template< typename, typename > class SegmentsView, + typename Device, + typename Index, + typename RealAllocator, + typename IndexAllocator > +template< typename Real2, typename Index2 > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getTransposition( const SparseMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + +}*/ + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +template< typename Vector1, typename Vector2 > +bool +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + return false; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >& +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +operator=( const SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >& matrix ) +{ + MatrixView< Real, Device, Index >::operator=( matrix ); + this->columnIndexes.bind( matrix.columnIndexes ); + this->segments = matrix.segments; + return *this; +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +save( File& file ) const +{ + MatrixView< RealType, DeviceType, IndexType >::save( file ); + file << this->columnIndexes; + this->segments.save( file ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +void +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +print( std::ostream& str ) const +{ + if( isSymmetric() ) + { + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + for( IndexType column = 0; column < this->getColumns(); column++ ) + { + auto value = this->getElement( row, column ); + if( value != ( RealType ) 0 ) + str << " Col:" << column << "->" << value << "\t"; + } + str << std::endl; + } + } + else + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + const auto rowLength = this->segments.getSegmentSize( row ); + for( IndexType i = 0; i < rowLength; i++ ) + { + const IndexType globalIdx = this->segments.getGlobalIndex( row, i ); + const IndexType column = this->columnIndexes.getElement( globalIdx ); + if( column == this->getPaddingIndex() ) + break; + RealType value; + if( isBinary() ) + value = ( RealType ) 1.0; + else + value = this->values.getElement( globalIdx ); + str << " Col:" << column << "->" << value << "\t"; + } + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index, + typename MatrixType, + template< typename, typename > class SegmentsView > +__cuda_callable__ +Index +SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView >:: +getPaddingIndex() const +{ + return -1; +} + + } //namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/ThreePartVector.h b/src/TNL/Matrices/ThreePartVector.h index f57e3e116a13e6f01d128284422a08f1c719c27c..f28f544f5bac6eeceb61d01ef49852fd1b36b6af 100644 --- a/src/TNL/Matrices/ThreePartVector.h +++ b/src/TNL/Matrices/ThreePartVector.h @@ -75,6 +75,17 @@ public: return right[ i - left.getSize() - middle.getSize() ]; } + __cuda_callable__ + const Real* getPointer( Index i ) const + { + if( i < left.getSize() ) + return &left.getData()[ i ]; + else if( i < left.getSize() + middle.getSize() ) + return &middle.getData()[ i - left.getSize() ]; + else + return &right.getData()[ i - left.getSize() - middle.getSize() ]; + } + friend std::ostream& operator<<( std::ostream& str, const ThreePartVectorView& v ) { str << "[\n\tleft: " << v.left << ",\n\tmiddle: " << v.middle << ",\n\tright: " << v.right << "\n]"; @@ -143,6 +154,17 @@ public: return right[ i - left.getSize() - middle.getSize() ]; } + __cuda_callable__ + const Real* getPointer( Index i ) const + { + if( i < left.getSize() ) + return &left.getData()[ i ]; + else if( i < left.getSize() + middle.getSize() ) + return &middle.getData()[ i - left.getSize() ]; + else + return &right.getData()[ i - left.getSize() - middle.getSize() ]; + } + friend std::ostream& operator<<( std::ostream& str, const ThreePartVector& v ) { str << "[\n\tleft: " << v.left << ",\n\tmiddle: " << v.middle << ",\n\tright: " << v.right << "\n]"; diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 3f57fe1c3e6de1cf0e608cd68b5846eb711e321d..0297936810fc61a6b81ac21b03f49a87818eb53c 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -12,198 +12,185 @@ #include <TNL/Matrices/Matrix.h> #include <TNL/Containers/Vector.h> -#include <TNL/Matrices/TridiagonalRow.h> +#include <TNL/Matrices/TridiagonalMatrixRowView.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h> +#include <TNL/Matrices/TridiagonalMatrixView.h> namespace TNL { -namespace Matrices { - -template< typename Device > -class TridiagonalDeviceDependentCode; +namespace Matrices { template< typename Real = double, typename Device = Devices::Host, - typename Index = int > -class Tridiagonal : public Matrix< Real, Device, Index > + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > +class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > { -private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using RealAllocatorType = RealAllocator; + using BaseType = Matrix< Real, Device, Index, RealAllocator >; + using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using ValuesViewType = typename ValuesVectorType::ViewType; + using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index > + using Self = Tridiagonal< _Real, _Device, _Index >; - // friend class will be needed for templated assignment operators - template< typename Real2, typename Device2, typename Index2 > - friend class Tridiagonal; + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; -public: - typedef Real RealType; - typedef Device DeviceType; - typedef Index IndexType; - typedef typename Matrix< Real, Device, Index >::CompressedRowLengthsVector CompressedRowLengthsVector; - typedef typename Matrix< Real, Device, Index >::ConstCompressedRowLengthsVectorView ConstCompressedRowLengthsVectorView; - typedef Matrix< Real, Device, Index > BaseType; - typedef TridiagonalRow< Real, Index > MatrixRow; + Tridiagonal(); - template< typename _Real = Real, - typename _Device = Device, - typename _Index = Index > - using Self = Tridiagonal< _Real, _Device, _Index >; + Tridiagonal( const IndexType rows, const IndexType columns ); - Tridiagonal(); + ViewType getView() const; // TODO: remove const - static String getSerializationType(); + //ConstViewType getConstView() const; - virtual String getSerializationTypeVirtual() const; + static String getSerializationType(); - void setDimensions( const IndexType rows, - const IndexType columns ); + virtual String getSerializationTypeVirtual() const; - void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void setDimensions( const IndexType rows, + const IndexType columns ); - IndexType getRowLength( const IndexType row ) const; + //template< typename Vector > + void setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowCapacities ); - __cuda_callable__ - IndexType getRowLengthFast( const IndexType row ) const; + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; - IndexType getMaxRowLength() const; + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; - template< typename Real2, typename Device2, typename Index2 > - void setLike( const Tridiagonal< Real2, Device2, Index2 >& m ); + IndexType getMaxRowLength() const; - IndexType getNumberOfMatrixElements() const; + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + void setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ); - IndexType getNumberOfNonzeroMatrixElements() const; + IndexType getNumberOfNonzeroMatrixElements() const; - IndexType getMaxRowlength() const; + void reset(); - void reset(); + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; - template< typename Real2, typename Device2, typename Index2 > - bool operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + bool operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; - template< typename Real2, typename Device2, typename Index2 > - bool operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const; + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); - void setValue( const RealType& v ); + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; - __cuda_callable__ - bool setElementFast( const IndexType row, - const IndexType column, - const RealType& value ); + void setValue( const RealType& v ); - bool setElement( const IndexType row, - const IndexType column, - const RealType& value ); + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); - __cuda_callable__ - bool addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); - bool addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator = 1.0 ); + RealType getElement( const IndexType row, + const IndexType column ) const; - __cuda_callable__ - bool setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - bool setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ); + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; - __cuda_callable__ - bool addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; - bool addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator = 1.0 ); + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); - __cuda_callable__ - RealType getElementFast( const IndexType row, - const IndexType column ) const; + template< typename Function > + void forAllRows( Function& function ) const; - RealType getElement( const IndexType row, - const IndexType column ) const; + template< typename Function > + void forAllRows( Function& function ); - __cuda_callable__ - void getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const; + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; - __cuda_callable__ - MatrixRow getRow( const IndexType rowIndex ); + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; - __cuda_callable__ - const MatrixRow getRow( const IndexType rowIndex ) const; + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + void addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; + template< typename Real2, typename Index2 > + void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); - template< typename InVector, - typename OutVector > - void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; - template< typename Real2, typename Index2 > - void addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0, - const RealType& thisMatrixMultiplicator = 1.0 ); + // copy assignment + Tridiagonal& operator=( const Tridiagonal& matrix ); - template< typename Real2, typename Index2 > - void getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); + // cross-device copy assignment + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ); - template< typename Vector1, typename Vector2 > - __cuda_callable__ - void performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega = 1.0 ) const; + void save( File& file ) const; - // copy assignment - Tridiagonal& operator=( const Tridiagonal& matrix ); + void load( File& file ); - // cross-device copy assignment - template< typename Real2, typename Device2, typename Index2, - typename = typename Enabler< Device2 >::type > - Tridiagonal& operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ); + void save( const String& fileName ) const; - void save( File& file ) const; + void load( const String& fileName ); - void load( File& file ); + void print( std::ostream& str ) const; - void save( const String& fileName ) const; + const IndexerType& getIndexer() const; - void load( const String& fileName ); + IndexerType& getIndexer(); - void print( std::ostream& str ) const; + __cuda_callable__ + IndexType getPaddingIndex() const; -protected: + protected: - __cuda_callable__ - IndexType getElementIndex( const IndexType row, - const IndexType column ) const; + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType localIdx ) const; - Containers::Vector< RealType, DeviceType, IndexType > values; + IndexerType indexer; - typedef TridiagonalDeviceDependentCode< DeviceType > DeviceDependentCode; - friend class TridiagonalDeviceDependentCode< DeviceType >; + ViewType view; }; } // namespace Matrices } // namespace TNL -#include <TNL/Matrices/Tridiagonal_impl.h> +#include <TNL/Matrices/Tridiagonal.hpp> diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3ddabc9852f6eeafe82b034f34917b916cec2ce2 --- /dev/null +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -0,0 +1,813 @@ +/*************************************************************************** + Tridiagonal.hpp - description + ------------------- + begin : Nov 30, 2013 + copyright : (C) 2013 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <sstream> +#include <TNL/Assert.h> +#include <TNL/Matrices/Tridiagonal.h> +#include <TNL/Exceptions/NotImplementedError.h> + +namespace TNL { +namespace Matrices { + +template< typename Device > +class TridiagonalDeviceDependentCode; + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal() +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal( const IndexType rows, const IndexType columns ) +{ + this->setDimensions( rows, columns ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getView() const -> ViewType +{ + // TODO: fix when getConstView works + return ViewType( const_cast< Tridiagonal* >( this )->values.getView(), indexer ); +} + +/*template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->values.getConstView(), indexer ); +}*/ + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationType() +{ + return String( "Matrices::Tridiagonal< " ) + + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +String +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setDimensions( const IndexType rows, const IndexType columns ) +{ + Matrix< Real, Device, Index >::setDimensions( rows, columns ); + this->indexer.setDimensions( rows, columns ); + this->values.setSize( this->indexer.getStorageSize() ); + this->values = 0.0; + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + // template< typename Vector > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) +{ + if( max( rowLengths ) > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( rowLengths.getElement( 0 ) > 2 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); + if( this->getRows() > this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 1 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() == this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 2 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); + if( this->getRows() < this->getColumns() ) + if( rowLengths.getElement( this->getRows()-1 ) > 3 ) + throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Vector > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + return this->view.getCompressedRowLengths( rowLengths ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRowLength( const IndexType row ) const +{ + return this->view.getRowLength( row ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getMaxRowLength() const +{ + return this->view.getMaxRowLength(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ) +{ + this->setDimensions( m.getRows(), m.getColumns() ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getNumberOfNonzeroMatrixElements() const +{ + return this->view.getNumberOfNonzeroMatrixElements(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +reset() +{ + Matrix< Real, Device, Index >::reset(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +bool +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +{ + return ! this->operator==( matrix ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setValue( const RealType& v ) +{ + this->view.setValue( v ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + return this->view.getRow( rowIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + return this->view.getRow( rowIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) +{ + this->view.setElement( row, column, value ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + this->view.addElement( row, column, value, thisElementMultiplicator ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Real +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElement( const IndexType row, const IndexType column ) const +{ + return this->view.getElement( row, column ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + this->view.forRows( first, last, function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + this->view.forRows( first, last, function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) const +{ + this->view.forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Function > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +forAllRows( Function& function ) +{ + this->view.forRows( 0, this->getRows(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +template< typename Vector > +__cuda_callable__ +typename Vector::RealType +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const +{ + return this->view.rowVectorProduct(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename InVector, + typename OutVector > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const +{ + this->view.vectorProduct( inVector, outVector ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + this->view.addMatrix( matrix.getView(), matrixMultiplicator, thisMatrixMultiplicator ); +} + +#ifdef HAVE_CUDA +template< typename Real, + typename Real2, + typename Index, + typename Index2 > +__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, + Tridiagonal< Real, Devices::Cuda, Index >* outMatrix, + const Real matrixMultiplicator, + const Index gridIdx ) +{ + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < inMatrix->getRows() ) + { + if( rowIdx > 0 ) + outMatrix->setElementFast( rowIdx-1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); + outMatrix->setElementFast( rowIdx, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); + if( rowIdx < inMatrix->getRows()-1 ) + outMatrix->setElementFast( rowIdx+1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); + } +} +#endif + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real2, typename Index2 > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getRows() == matrix.getRows(), + std::cerr << "This matrix rows: " << this->getRows() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + for( IndexType i = 1; i < rows; i++ ) + { + RealType aux = matrix. getElement( i, i - 1 ); + this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); + this->setElement( i, i, matrix.getElement( i, i ) ); + this->setElement( i - 1, i, aux ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + Tridiagonal* kernel_this = Cuda::passToDevice( *this ); + typedef Tridiagonal< Real2, Device, Index2 > InMatrixType; + InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_inMatrix, + kernel_this, + matrixMultiplicator, + gridIdx ); + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inMatrix ); + TNL_CHECK_CUDA_DEVICE; +#endif + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Vector1, typename Vector2 > +__cuda_callable__ +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ); + if( row > 0 ) + sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ]; + if( row < this->getColumns() - 1 ) + sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ]; + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum ); +} + + +// copy assignment +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal& matrix ) +{ + this->setLike( matrix ); + this->values = matrix.values; + return *this; +} + +// cross-device copy assignment +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) +{ + static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, + "unknown device" ); + static_assert( std::is_same< Device_, Devices::Host >::value || std::is_same< Device_, Devices::Cuda >::value, + "unknown device" ); + + this->setLike( matrix ); + if( RowMajorOrder == RowMajorOrder_ ) + this->values = matrix.getValues(); + else + { + if( std::is_same< Device, Device_ >::value ) + { + const auto matrix_view = matrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + else + { + Tridiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix; + auxMatrix = matrix; + const auto matrix_view = auxMatrix.getView(); + auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrix_view.getValues()[ matrix_view.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + this->forAllRows( f ); + } + } + return *this; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const +{ + Matrix< Real, Device, Index >::save( file ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) +{ + Matrix< Real, Device, Index >::load( file ); + this->indexer.setDimensions( this->getRows(), this->getColumns() ); + this->view = this->getView(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName ) +{ + Object::load( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +void +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +print( std::ostream& str ) const +{ + this->view.print( str ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +auto +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getElementIndex( const IndexType row, const IndexType column ) const +{ + IndexType localIdx = column - row; + if( row > 0 ) + localIdx++; + + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + + return this->indexer.getGlobalIndex( row, localIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder, + typename RealAllocator > +__cuda_callable__ +Index +Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +getPaddingIndex() const +{ + return this->view.getPaddingIndex(); +} + +/* +template<> +class TridiagonalDeviceDependentCode< Devices::Host > +{ + public: + + typedef Devices::Host Device; + + template< typename Index > + __cuda_callable__ + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return 2*row + column; + } + + template< typename Vector, + typename Index, + typename ValuesType > + __cuda_callable__ + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ 1 ]; + Index i = 3 * row; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i - 1 ] + + vector[ row ] * values[ i ]; + return vector[ row - 1 ] * values[ i - 1 ] + + vector[ row ] * values[ i ] + + vector[ row + 1 ] * values[ i + 1 ]; + } + + template< typename Real, + typename Index, + typename InVector, + typename OutVector > + static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, + const InVector& inVector, + OutVector& outVector ) + { +#ifdef HAVE_OPENMP +#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) +#endif + for( Index row = 0; row < matrix.getRows(); row ++ ) + outVector[ row ] = matrix.rowVectorProduct( row, inVector ); + } +}; + +template<> +class TridiagonalDeviceDependentCode< Devices::Cuda > +{ + public: + + typedef Devices::Cuda Device; + + template< typename Index > + __cuda_callable__ + static Index getElementIndex( const Index rows, + const Index row, + const Index column ) + { + return ( column - row + 1 )*rows + row - 1; + } + + template< typename Vector, + typename Index, + typename ValuesType > + __cuda_callable__ + static typename Vector::RealType rowVectorProduct( const Index rows, + const ValuesType& values, + const Index row, + const Vector& vector ) + { + if( row == 0 ) + return vector[ 0 ] * values[ 0 ] + + vector[ 1 ] * values[ rows - 1 ]; + Index i = row - 1; + if( row == rows - 1 ) + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ]; + return vector[ row - 1 ] * values[ i ] + + vector[ row ] * values[ i + rows ] + + vector[ row + 1 ] * values[ i + 2*rows ]; + } + + template< typename Real, + typename Index, + typename InVector, + typename OutVector > + static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, + const InVector& inVector, + OutVector& outVector ) + { + MatrixVectorProductCuda( matrix, inVector, outVector ); + } +}; + */ + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.h b/src/TNL/Matrices/TridiagonalMatrixRowView.h new file mode 100644 index 0000000000000000000000000000000000000000..e77d826e052ad5bad9d5dec95dd05059e57afe92 --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrixRowView.h @@ -0,0 +1,59 @@ +/*************************************************************************** + TridiagonalMatrixRowView.h - description + ------------------- + begin : Dec 31, 2014 + copyright : (C) 2014 by oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename ValuesView, + typename Indexer > +class TridiagonalMatrixRowView +{ + public: + + using RealType = typename ValuesView::RealType; + using IndexType = typename ValuesView::IndexType; + using ValuesViewType = ValuesView; + using IndexerType = Indexer; + + __cuda_callable__ + TridiagonalMatrixRowView( const IndexType rowIdx, + const ValuesViewType& values, + const IndexerType& indexer ); + + __cuda_callable__ + IndexType getSize() const; + + __cuda_callable__ + const IndexType getColumnIndex( const IndexType localIdx ) const; + + __cuda_callable__ + const RealType& getValue( const IndexType localIdx ) const; + + __cuda_callable__ + RealType& getValue( const IndexType localIdx ); + + __cuda_callable__ + void setElement( const IndexType localIdx, + const RealType& value ); + protected: + + IndexType rowIdx; + + ValuesViewType values; + + Indexer indexer; +}; + +} // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/TridiagonalMatrixRowView.hpp> diff --git a/src/TNL/Matrices/TridiagonalMatrixRowView.hpp b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..80fc1a26d52c32b60d1e184ee0beb87ef908c687 --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrixRowView.hpp @@ -0,0 +1,75 @@ +/*************************************************************************** + TridiagonalMatrixRowView.hpp - description + ------------------- + begin : Dec 31, 2014 + copyright : (C) 2014 by oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { +namespace Matrices { + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +TridiagonalMatrixRowView< ValuesView, Indexer >:: +TridiagonalMatrixRowView( const IndexType rowIdx, + const ValuesViewType& values, + const IndexerType& indexer ) +: rowIdx( rowIdx ), values( values ), indexer( indexer ) +{ +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +TridiagonalMatrixRowView< ValuesView, Indexer >:: +getSize() const -> IndexType +{ + return indexer.getRowSize( rowIdx ); +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +TridiagonalMatrixRowView< ValuesView, Indexer >:: +getColumnIndex( const IndexType localIdx ) const -> const IndexType +{ + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + return rowIdx + localIdx - 1; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +TridiagonalMatrixRowView< ValuesView, Indexer >:: +getValue( const IndexType localIdx ) const -> const RealType& +{ + return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +auto +TridiagonalMatrixRowView< ValuesView, Indexer >:: +getValue( const IndexType localIdx ) -> RealType& +{ + return this->values[ this->indexer.getGlobalIndex( rowIdx, localIdx ) ]; +} + +template< typename ValuesView, typename Indexer > +__cuda_callable__ +void +TridiagonalMatrixRowView< ValuesView, Indexer >:: +setElement( const IndexType localIdx, + const RealType& value ) +{ + this->values[ indexer.getGlobalIndex( rowIdx, localIdx ) ] = value; +} + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h new file mode 100644 index 0000000000000000000000000000000000000000..82b76c73f76f2695c5eb3dacdcf685fad4466fc1 --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -0,0 +1,169 @@ +/*************************************************************************** + TridiagonalMatrixView.h - description + ------------------- + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Matrices/MatrixView.h> +#include <TNL/Containers/Vector.h> +#include <TNL/Matrices/TridiagonalMatrixRowView.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Matrices/details/TridiagonalMatrixIndexer.h> + +namespace TNL { +namespace Matrices { + +template< typename Real = double, + typename Device = Devices::Host, + typename Index = int, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > +class TridiagonalMatrixView : public MatrixView< Real, Device, Index > +{ + public: + using RealType = Real; + using DeviceType = Device; + using IndexType = Index; + using BaseType = MatrixView< Real, Device, Index >; + using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using ValuesViewType = typename BaseType::ValuesView; + using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; + using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; + + // TODO: remove this - it is here only for compatibility with original matrix implementation + typedef Containers::Vector< IndexType, DeviceType, IndexType > CompressedRowLengthsVector; + typedef Containers::VectorView< IndexType, DeviceType, IndexType > CompressedRowLengthsVectorView; + typedef typename CompressedRowLengthsVectorView::ConstViewType ConstCompressedRowLengthsVectorView; + + template< typename _Real = Real, + typename _Device = Device, + typename _Index = Index, + bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value > + using Self = TridiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >; + + TridiagonalMatrixView(); + + TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer ); + + ViewType getView(); + + ConstViewType getConstView() const; + + static String getSerializationType(); + + virtual String getSerializationTypeVirtual() const; + + template< typename Vector > + void getCompressedRowLengths( Vector& rowLengths ) const; + + [[deprecated]] + IndexType getRowLength( const IndexType row ) const; + + IndexType getMaxRowLength() const; + + IndexType getNumberOfNonzeroMatrixElements() const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + + __cuda_callable__ + RowView getRow( const IndexType& rowIdx ); + + __cuda_callable__ + const RowView getRow( const IndexType& rowIdx ) const; + + void setValue( const RealType& v ); + + void setElement( const IndexType row, + const IndexType column, + const RealType& value ); + + void addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator = 1.0 ); + + RealType getElement( const IndexType row, + const IndexType column ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > + void allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ) const; + + template< typename Function > + void forRows( IndexType first, IndexType last, Function& function ); + + template< typename Function > + void forAllRows( Function& function ) const; + + template< typename Function > + void forAllRows( Function& function ); + + template< typename Vector > + __cuda_callable__ + typename Vector::RealType rowVectorProduct( const IndexType row, + const Vector& vector ) const; + + template< typename InVector, + typename OutVector > + void vectorProduct( const InVector& inVector, + OutVector& outVector ) const; + + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + const RealType& matrixMultiplicator = 1.0, + const RealType& thisMatrixMultiplicator = 1.0 ); + + template< typename Real2, typename Index2 > + void getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator = 1.0 ); + + template< typename Vector1, typename Vector2 > + __cuda_callable__ + void performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega = 1.0 ) const; + + void save( File& file ) const; + + void save( const String& fileName ) const; + + void print( std::ostream& str ) const; + + __cuda_callable__ + const IndexerType& getIndexer() const; + + __cuda_callable__ + IndexerType& getIndexer(); + + __cuda_callable__ + IndexType getPaddingIndex() const; + + protected: + + __cuda_callable__ + IndexType getElementIndex( const IndexType row, + const IndexType localIdx ) const; + + IndexerType indexer; +}; + +} // namespace Matrices +} // namespace TNL + +#include <TNL/Matrices/TridiagonalMatrixView.hpp> diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp new file mode 100644 index 0000000000000000000000000000000000000000..99e3e87d4ab189e50d26dc363d78313723ea930c --- /dev/null +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -0,0 +1,705 @@ +/*************************************************************************** + TridiagonalMatrixView.hpp - description + ------------------- + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +#include <TNL/Assert.h> +#include <TNL/Matrices/TridiagonalMatrixView.h> +#include <TNL/Exceptions/NotImplementedError.h> + +namespace TNL { +namespace Matrices { + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView() +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer ) +: MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), indexer( indexer ) +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getView() -> ViewType +{ + return ViewType( this->values.getView(), indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getConstView() const -> ConstViewType +{ + return ConstViewType( this->values.getConstView(), indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationType() +{ + return String( "Matrices::Tridiagonal< " ) + + TNL::getSerializationType< RealType >() + ", [any_device], " + + TNL::getSerializationType< IndexType >() + ", " + + ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +String +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getSerializationTypeVirtual() const +{ + return this->getSerializationType(); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getCompressedRowLengths( Vector& rowLengths ) const +{ + rowLengths.setSize( this->getRows() ); + rowLengths = 0; + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + this->allRowsReduction( fetch, reduce, keep, 0 ); +} + + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRowLength( const IndexType row ) const +{ + return this->indexer.getRowSize( row ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getMaxRowLength() const +{ + return 3; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getNumberOfNonzeroMatrixElements() const +{ + const auto values_view = this->values.getConstView(); + auto fetch = [=] __cuda_callable__ ( const IndexType i ) -> IndexType { + return ( values_view[ i ] != 0.0 ); + }; + return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +bool +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +{ + if( RowMajorOrder == RowMajorOrder_ ) + return this->values == matrix.values; + else + { + TNL_ASSERT( false, "TODO" ); + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +bool +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +{ + return ! this->operator==( matrix ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +setValue( const RealType& v ) +{ + this->values = v; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) const -> const RowView +{ + return RowView( rowIdx, this->values.getView(), this->indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getRow( const IndexType& rowIdx ) -> RowView +{ + return RowView( rowIdx, this->values.getView(), this->indexer ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +setElement( const IndexType row, const IndexType column, const RealType& value ) +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( row - column ) > 1 ) + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + throw std::logic_error( msg.str() ); + } + this->values.setElement( this->getElementIndex( row, column ), value ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +addElement( const IndexType row, + const IndexType column, + const RealType& value, + const RealType& thisElementMultiplicator ) +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + if( abs( row - column ) > 1 ) + { + std::stringstream msg; + msg << "Wrong matrix element coordinates ( " << row << ", " << column << " ) in tridiagonal matrix."; + throw std::logic_error( msg.str() ); + } + const Index i = this->getElementIndex( row, column ); + this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +Real +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getElement( const IndexType row, const IndexType column ) const +{ + TNL_ASSERT_GE( row, 0, "" ); + TNL_ASSERT_LT( row, this->getRows(), "" ); + TNL_ASSERT_GE( column, 0, "" ); + TNL_ASSERT_LT( column, this->getColumns(), "" ); + + if( abs( column - row ) > 1 ) + return 0.0; + return this->values.getElement( this->getElementIndex( row, column ) ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const +{ + using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); + const auto values_view = this->values.getConstView(); + const auto indexer = this->indexer; + const auto zero = zero_; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + Real_ sum( zero ); + if( rowIdx == 0 ) + { + reduce( sum, fetch( 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ) ); + reduce( sum, fetch( 0, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ) ); + keep( 0, sum ); + return; + } + if( rowIdx + 1 < indexer.getColumns() ) + { + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ) ); + keep( rowIdx, sum ); + return; + } + if( rowIdx < indexer.getColumns() ) + { + reduce( sum, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + reduce( sum, fetch( rowIdx, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ) ); + keep( rowIdx, sum ); + } + else + { + keep( rowIdx, fetch( rowIdx, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ) ); + } + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const +{ + this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) const +{ + const auto values_view = this->values.getConstView(); + const auto indexer = this->indexer; + bool compute( true ); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + if( rowIdx == 0 ) + { + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ], compute ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ], compute ); + } + else if( rowIdx + 1 < indexer.getColumns() ) + { + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ], compute ); + } + else if( rowIdx < indexer.getColumns() ) + { + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ], compute ); + } + else + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ], compute ); + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forRows( IndexType first, IndexType last, Function& function ) +{ + auto values_view = this->values.getView(); + const auto indexer = this->indexer; + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + if( rowIdx == 0 ) + { + function( 0, 0, 0, values_view[ indexer.getGlobalIndex( 0, 0 ) ] ); + function( 0, 1, 1, values_view[ indexer.getGlobalIndex( 0, 1 ) ] ); + } + else if( rowIdx + 1 < indexer.getColumns() ) + { + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + function( rowIdx, 2, rowIdx + 1, values_view[ indexer.getGlobalIndex( rowIdx, 2 ) ] ); + } + else if( rowIdx < indexer.getColumns() ) + { + function( rowIdx, 0, rowIdx - 1, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + function( rowIdx, 1, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 1 ) ] ); + } + else + function( rowIdx, 0, rowIdx, values_view[ indexer.getGlobalIndex( rowIdx, 0 ) ] ); + }; + Algorithms::ParallelFor< DeviceType >::exec( first, last, f ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) const +{ + this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Function > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +forAllRows( Function& function ) +{ + this->forRows( 0, this->indexer.getNonemptyRowsCount(), function ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +template< typename Vector > +__cuda_callable__ +typename Vector::RealType +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +rowVectorProduct( const IndexType row, const Vector& vector ) const +{ +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename InVector, + typename OutVector > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +vectorProduct( const InVector& inVector, OutVector& outVector ) const +{ + TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); + TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows do not fit with output vector." ); + + const auto inVectorView = inVector.getConstView(); + auto outVectorView = outVector.getView(); + auto fetch = [=] __cuda_callable__ ( const IndexType& row, const IndexType& column, const RealType& value ) -> RealType { + return value * inVectorView[ column ]; + }; + auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { + sum += value; + }; + auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { + outVectorView[ row ] = value; + }; + this->allRowsReduction( fetch, reduction, keeper, ( RealType ) 0.0 ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + const RealType& matrixMultiplicator, + const RealType& thisMatrixMultiplicator ) +{ + TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." ); + TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." ); + + if( RowMajorOrder == RowMajorOrder_ ) + { + if( thisMatrixMultiplicator == 1.0 ) + this->values += matrixMultiplicator * matrix.getValues(); + else + this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.getValues(); + } + else + { + const auto matrix_view = matrix; + const auto matrixMult = matrixMultiplicator; + const auto thisMult = thisMatrixMultiplicator; + auto add0 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto add1 = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value += matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + auto addGen = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { + value = thisMult * value + matrixMult * matrix.getValues()[ matrix.getIndexer().getGlobalIndex( rowIdx, localIdx ) ]; + }; + if( thisMult == 0.0 ) + this->forAllRows( add0 ); + else if( thisMult == 1.0 ) + this->forAllRows( add1 ); + else + this->forAllRows( addGen ); + } +} + +#ifdef HAVE_CUDA +/*template< typename Real, + typename Real2, + typename Index, + typename Index2 > +__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, + Tridiagonal< Real, Devices::Cuda, Index >* outMatrix, + const Real matrixMultiplicator, + const Index gridIdx ) +{ + const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; + if( rowIdx < inMatrix->getRows() ) + { + if( rowIdx > 0 ) + outMatrix->setElementFast( rowIdx-1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); + outMatrix->setElementFast( rowIdx, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); + if( rowIdx < inMatrix->getRows()-1 ) + outMatrix->setElementFast( rowIdx+1, + rowIdx, + matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); + } +}*/ +#endif + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Real2, typename Index2 > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, + const RealType& matrixMultiplicator ) +{ + TNL_ASSERT( this->getRows() == matrix.getRows(), + std::cerr << "This matrix rows: " << this->getRows() << std::endl + << "That matrix rows: " << matrix.getRows() << std::endl ); + if( std::is_same< Device, Devices::Host >::value ) + { + const IndexType& rows = matrix.getRows(); + for( IndexType i = 1; i < rows; i++ ) + { + RealType aux = matrix. getElement( i, i - 1 ); + this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); + this->setElement( i, i, matrix.getElement( i, i ) ); + this->setElement( i - 1, i, aux ); + } + } + if( std::is_same< Device, Devices::Cuda >::value ) + { +#ifdef HAVE_CUDA + /*Tridiagonal* kernel_this = Cuda::passToDevice( *this ); + typedef Tridiagonal< Real2, Device, Index2 > InMatrixType; + InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); + dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); + const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); + const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); + for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) + { + if( gridIdx == cudaGrids - 1 ) + cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); + TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> + ( kernel_inMatrix, + kernel_this, + matrixMultiplicator, + gridIdx ); + } + Cuda::freeFromDevice( kernel_this ); + Cuda::freeFromDevice( kernel_inMatrix ); + TNL_CHECK_CUDA_DEVICE;*/ +#endif + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > + template< typename Vector1, typename Vector2 > +__cuda_callable__ +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +performSORIteration( const Vector1& b, + const IndexType row, + Vector2& x, + const RealType& omega ) const +{ + RealType sum( 0.0 ); + if( row > 0 ) + sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ]; + if( row < this->getColumns() - 1 ) + sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ]; + x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum ); +} + + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const +{ + MatrixView< Real, Device, Index >::save( file ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +save( const String& fileName ) const +{ + Object::save( fileName ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const +{ + for( IndexType row = 0; row < this->getRows(); row++ ) + { + str <<"Row: " << row << " -> "; + for( IndexType column = row - 1; column < row + 2; column++ ) + if( column >= 0 && column < this->columns ) + { + auto v = this->getElement( row, column ); + if( v ) + str << " Col:" << column << "->" << v << "\t"; + } + str << std::endl; + } +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() const -> const IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +auto +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getIndexer() -> IndexerType& +{ + return this->indexer; +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getElementIndex( const IndexType row, const IndexType column ) const +{ + IndexType localIdx = column - row; + if( row > 0 ) + localIdx++; + + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + + return this->indexer.getGlobalIndex( row, localIdx ); +} + +template< typename Real, + typename Device, + typename Index, + bool RowMajorOrder > +__cuda_callable__ +Index +TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +getPaddingIndex() const +{ + return -1; +} + + +} // namespace Matrices +} // namespace TNL diff --git a/src/TNL/Matrices/TridiagonalRow.h b/src/TNL/Matrices/TridiagonalRow.h deleted file mode 100644 index 9d06b39e18f8914957852694a6b4fd98d42e0f33..0000000000000000000000000000000000000000 --- a/src/TNL/Matrices/TridiagonalRow.h +++ /dev/null @@ -1,51 +0,0 @@ -/*************************************************************************** - TridiagonalRow.h - description - ------------------- - begin : Dec 31, 2014 - copyright : (C) 2014 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { -namespace Matrices { - -template< typename Real, typename Index > -class TridiagonalRow -{ - public: - - __cuda_callable__ - TridiagonalRow(); - - __cuda_callable__ - TridiagonalRow( Real* values, - const Index row, - const Index columns, - const Index step ); - - __cuda_callable__ - void bind( Real* values, - const Index row, - const Index columns, - const Index step ); - - __cuda_callable__ - void setElement( const Index& elementIndex, - const Index& column, - const Real& value ); - - protected: - - Real* values; - - Index row, columns, step; -}; - -} // namespace Matrices -} // namespace TNL - -#include <TNL/Matrices/TridiagonalRow_impl.h> diff --git a/src/TNL/Matrices/TridiagonalRow_impl.h b/src/TNL/Matrices/TridiagonalRow_impl.h deleted file mode 100644 index f5b7e842a4c4b69c77aa11f2ee09984eb46f9808..0000000000000000000000000000000000000000 --- a/src/TNL/Matrices/TridiagonalRow_impl.h +++ /dev/null @@ -1,78 +0,0 @@ -/*************************************************************************** - TridiagonalRow_impl.h - description - ------------------- - begin : Dec 31, 2014 - copyright : (C) 2014 by oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -namespace TNL { -namespace Matrices { - -template< typename Real, typename Index > -__cuda_callable__ -TridiagonalRow< Real, Index >:: -TridiagonalRow() -: values( 0 ), - row( 0 ), - columns( 0 ), - step( 0 ) -{ -} - -template< typename Real, typename Index > -__cuda_callable__ -TridiagonalRow< Real, Index >:: -TridiagonalRow( Real* values, - const Index row, - const Index columns, - const Index step ) -: values( values ), - row( row ), - columns( columns ), - step( step ) -{ -} - -template< typename Real, typename Index > -__cuda_callable__ -void -TridiagonalRow< Real, Index >:: -bind( Real* values, - const Index row, - const Index columns, - const Index step ) -{ - this->values = values; - this->row = row; - this->columns = columns; - this->step = step; -} - -template< typename Real, typename Index > -__cuda_callable__ -void -TridiagonalRow< Real, Index >:: -setElement( const Index& elementIndex, - const Index& column, - const Real& value ) -{ - TNL_ASSERT( this->values, ); - TNL_ASSERT( this->step > 0,); - TNL_ASSERT( column >= 0 && column < this->columns, - std::cerr << "column = " << columns << " this->columns = " << this->columns ); - TNL_ASSERT( abs( column - row ) <= 1, - std::cerr << "column = " << column << " row = " << row ); - - /**** - * this->values stores an adress of the diagonal element - */ - this->values[ ( column - row ) * this->step ] = value; -} - -} // namespace Matrices -} // namespace TNL diff --git a/src/TNL/Matrices/Tridiagonal_impl.h b/src/TNL/Matrices/Tridiagonal_impl.h deleted file mode 100644 index 2752f6850320035dca48169c5e1ae2806aa47ff5..0000000000000000000000000000000000000000 --- a/src/TNL/Matrices/Tridiagonal_impl.h +++ /dev/null @@ -1,759 +0,0 @@ -/*************************************************************************** - Tridiagonal_impl.h - description - ------------------- - begin : Nov 30, 2013 - copyright : (C) 2013 by Tomas Oberhuber - email : tomas.oberhuber@fjfi.cvut.cz - ***************************************************************************/ - -/* See Copyright Notice in tnl/Copyright */ - -#pragma once - -#include <TNL/Assert.h> -#include <TNL/Matrices/Tridiagonal.h> -#include <TNL/Exceptions/NotImplementedError.h> - -namespace TNL { -namespace Matrices { - -template< typename Device > -class TridiagonalDeviceDependentCode; - -template< typename Real, - typename Device, - typename Index > -Tridiagonal< Real, Device, Index >::Tridiagonal() -{ -} - -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getType() -{ - return String( "Matrices::Tridiagonal< " ) + - String( TNL::getType< Real >() ) + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + - String( TNL::getType< Index >() ) + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getSerializationType() -{ - return String( "Matrices::Tridiagonal< " ) + - getType< RealType >() + ", " + - getType< Device >() + ", " + - getType< IndexType >() + " >"; -} - -template< typename Real, - typename Device, - typename Index > -String Tridiagonal< Real, Device, Index >::getSerializationTypeVirtual() const -{ - return this->getSerializationType(); -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setDimensions( const IndexType rows, - const IndexType columns ) -{ - Matrix< Real, Device, Index >::setDimensions( rows, columns ); - values.setSize( 3*min( rows, columns ) ); - this->values.setValue( 0.0 ); -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) -{ - if( rowLengths[ 0 ] > 2 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); - for( Index i = 1; i < diagonalLength-1; i++ ) - if( rowLengths[ i ] > 3 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - if( this->getRows() > this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 1 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - if( this->getRows() == this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 2 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); - if( this->getRows() < this->getColumns() ) - if( rowLengths[ this->getRows()-1 ] > 3 ) - throw std::logic_error( "Too many non-zero elements per row in a tri-diagonal matrix." ); -} - -template< typename Real, - typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getRowLength( const IndexType row ) const -{ - return this->getRowLengthFast( row ); -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -Index Tridiagonal< Real, Device, Index >::getRowLengthFast( const IndexType row ) const -{ - const IndexType diagonalLength = min( this->getRows(), this->getColumns() ); - if( row == 0 ) - return 2; - if( row > 0 && row < diagonalLength - 1 ) - return 3; - if( this->getRows() > this->getColumns() ) - return 1; - if( this->getRows() == this->getColumns() ) - return 2; - return 3; -} - -template< typename Real, - typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getMaxRowLength() const -{ - return 3; -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -void Tridiagonal< Real, Device, Index >::setLike( const Tridiagonal< Real2, Device2, Index2 >& m ) -{ - this->setDimensions( m.getRows(), m.getColumns() ); -} - -template< typename Real, - typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index >::getNumberOfMatrixElements() const -{ - return 3 * min( this->getRows(), this->getColumns() ); -} - -template< typename Real, - typename Device, - typename Index > -Index Tridiagonal< Real, Device, Index > :: getNumberOfNonzeroMatrixElements() const -{ - IndexType nonzeroElements = 0; - for( IndexType i = 0; i < this->values.getSize(); i++ ) - if( this->values.getElement( i ) != 0 ) - nonzeroElements++; - return nonzeroElements; -} - -template< typename Real, - typename Device, - typename Index > -Index -Tridiagonal< Real, Device, Index >:: -getMaxRowlength() const -{ - return 3; -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::reset() -{ - Matrix< Real, Device, Index >::reset(); - this->values.reset(); -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -bool Tridiagonal< Real, Device, Index >::operator == ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const -{ - return this->values == matrix.values; -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2 > -bool Tridiagonal< Real, Device, Index >::operator != ( const Tridiagonal< Real2, Device2, Index2 >& matrix ) const -{ - return this->values != matrix.values; -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::setValue( const RealType& v ) -{ - this->values.setValue( v ); -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::setElementFast( const IndexType row, - const IndexType column, - const RealType& value ) -{ - this->values[ this->getElementIndex( row, column ) ] = value; - return true; -} - -template< typename Real, - typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::setElement( const IndexType row, - const IndexType column, - const RealType& value ) -{ - this->values.setElement( this->getElementIndex( row, column ), value ); - return true; -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::addElementFast( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) -{ - const Index i = this->getElementIndex( row, column ); - this->values[ i ] = thisElementMultiplicator*this->values[ i ] + value; - return true; -} - -template< typename Real, - typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::addElement( const IndexType row, - const IndexType column, - const RealType& value, - const RealType& thisElementMultiplicator ) -{ - const Index i = this->getElementIndex( row, column ); - this->values.setElement( i, thisElementMultiplicator * this->values.getElement( i ) + value ); - return true; -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::setRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - return this->addRowFast( row, columns, values, elements, 0.0 ); -} - -template< typename Real, - typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::setRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - return this->addRow( row, columns, values, elements, 0.0 ); -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -bool Tridiagonal< Real, Device, Index >::addRowFast( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - if( elements > 3 ) - return false; - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType& column = columns[ i ]; - if( column < row - 1 || column > row + 1 ) - return false; - addElementFast( row, column, values[ i ], thisRowMultiplicator ); - } - return true; -} - -template< typename Real, - typename Device, - typename Index > -bool Tridiagonal< Real, Device, Index >::addRow( const IndexType row, - const IndexType* columns, - const RealType* values, - const IndexType elements, - const RealType& thisRowMultiplicator ) -{ - TNL_ASSERT( elements <= this->columns, - std::cerr << " elements = " << elements - << " this->columns = " << this->columns ); - if( elements > 3 ) - return false; - for( IndexType i = 0; i < elements; i++ ) - { - const IndexType column = columns[ i ]; - if( column < row - 1 || column > row + 1 ) - return false; - addElement( row, column, values[ i ], thisRowMultiplicator ); - } - return true; -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -Real Tridiagonal< Real, Device, Index >::getElementFast( const IndexType row, - const IndexType column ) const -{ - if( abs( column - row ) > 1 ) - return 0.0; - return this->values[ this->getElementIndex( row, column ) ]; -} - -template< typename Real, - typename Device, - typename Index > -Real Tridiagonal< Real, Device, Index >::getElement( const IndexType row, - const IndexType column ) const -{ - if( abs( column - row ) > 1 ) - return 0.0; - return this->values.getElement( this->getElementIndex( row, column ) ); -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -void Tridiagonal< Real, Device, Index >::getRowFast( const IndexType row, - IndexType* columns, - RealType* values ) const -{ - IndexType elementPointer( 0 ); - for( IndexType i = -1; i <= 1; i++ ) - { - const IndexType column = row + 1; - if( column >= 0 && column < this->getColumns() ) - { - columns[ elementPointer ] = column; - values[ elementPointer ] = this->values[ this->getElementIndex( row, column ) ]; - elementPointer++; - } - } -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -typename Tridiagonal< Real, Device, Index >::MatrixRow -Tridiagonal< Real, Device, Index >:: -getRow( const IndexType rowIndex ) -{ - if( std::is_same< Device, Devices::Host >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ], - rowIndex, - this->getColumns(), - 1 ); - if( std::is_same< Device, Devices::Cuda >::value ) - return MatrixRow( &this->values.getData()[ this->getElementIndex( rowIndex, rowIndex ) ], - rowIndex, - this->getColumns(), - this->rows ); -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -const typename Tridiagonal< Real, Device, Index >::MatrixRow -Tridiagonal< Real, Device, Index >:: -getRow( const IndexType rowIndex ) const -{ - throw Exceptions::NotImplementedError(); -} - - -template< typename Real, - typename Device, - typename Index > -template< typename Vector > -__cuda_callable__ -typename Vector::RealType Tridiagonal< Real, Device, Index >::rowVectorProduct( const IndexType row, - const Vector& vector ) const -{ - return TridiagonalDeviceDependentCode< Device >:: - rowVectorProduct( this->rows, - this->values, - row, - vector ); -} - -template< typename Real, - typename Device, - typename Index > - template< typename InVector, - typename OutVector > -void Tridiagonal< Real, Device, Index >::vectorProduct( const InVector& inVector, - OutVector& outVector ) const -{ - TNL_ASSERT( this->getColumns() == inVector.getSize(), - std::cerr << "Matrix columns: " << this->getColumns() << std::endl - << "Vector size: " << inVector.getSize() << std::endl ); - TNL_ASSERT( this->getRows() == outVector.getSize(), - std::cerr << "Matrix rows: " << this->getRows() << std::endl - << "Vector size: " << outVector.getSize() << std::endl ); - - DeviceDependentCode::vectorProduct( *this, inVector, outVector ); -} - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index >::addMatrix( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator, - const RealType& thisMatrixMultiplicator ) -{ - TNL_ASSERT( this->getRows() == matrix.getRows(), - std::cerr << "This matrix columns: " << this->getColumns() << std::endl - << "This matrix rows: " << this->getRows() << std::endl ); - - if( thisMatrixMultiplicator == 1.0 ) - this->values += matrixMultiplicator * matrix.values; - else - this->values = thisMatrixMultiplicator * this->values + matrixMultiplicator * matrix.values; -} - -#ifdef HAVE_CUDA -template< typename Real, - typename Real2, - typename Index, - typename Index2 > -__global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, Devices::Cuda, Index2 >* inMatrix, - Tridiagonal< Real, Devices::Cuda, Index >* outMatrix, - const Real matrixMultiplicator, - const Index gridIdx ) -{ - const Index rowIdx = ( gridIdx * Cuda::getMaxGridSize() + blockIdx.x ) * blockDim.x + threadIdx.x; - if( rowIdx < inMatrix->getRows() ) - { - if( rowIdx > 0 ) - outMatrix->setElementFast( rowIdx-1, - rowIdx, - matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx-1 ) ); - outMatrix->setElementFast( rowIdx, - rowIdx, - matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx ) ); - if( rowIdx < inMatrix->getRows()-1 ) - outMatrix->setElementFast( rowIdx+1, - rowIdx, - matrixMultiplicator * inMatrix->getElementFast( rowIdx, rowIdx+1 ) ); - } -} -#endif - -template< typename Real, - typename Device, - typename Index > - template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator ) -{ - TNL_ASSERT( this->getRows() == matrix.getRows(), - std::cerr << "This matrix rows: " << this->getRows() << std::endl - << "That matrix rows: " << matrix.getRows() << std::endl ); - if( std::is_same< Device, Devices::Host >::value ) - { - const IndexType& rows = matrix.getRows(); - for( IndexType i = 1; i < rows; i++ ) - { - RealType aux = matrix. getElement( i, i - 1 ); - this->setElement( i, i - 1, matrix.getElement( i - 1, i ) ); - this->setElement( i, i, matrix.getElement( i, i ) ); - this->setElement( i - 1, i, aux ); - } - } - if( std::is_same< Device, Devices::Cuda >::value ) - { -#ifdef HAVE_CUDA - Tridiagonal* kernel_this = Cuda::passToDevice( *this ); - typedef Tridiagonal< Real2, Device, Index2 > InMatrixType; - InMatrixType* kernel_inMatrix = Cuda::passToDevice( matrix ); - dim3 cudaBlockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); - const IndexType cudaBlocks = roundUpDivision( matrix.getRows(), cudaBlockSize.x ); - const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); - for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) - { - if( gridIdx == cudaGrids - 1 ) - cudaGridSize.x = cudaBlocks % Cuda::getMaxGridSize(); - TridiagonalTranspositionCudaKernel<<< cudaGridSize, cudaBlockSize >>> - ( kernel_inMatrix, - kernel_this, - matrixMultiplicator, - gridIdx ); - } - Cuda::freeFromDevice( kernel_this ); - Cuda::freeFromDevice( kernel_inMatrix ); - TNL_CHECK_CUDA_DEVICE; -#endif - } -} - -template< typename Real, - typename Device, - typename Index > - template< typename Vector1, typename Vector2 > -__cuda_callable__ -void Tridiagonal< Real, Device, Index >::performSORIteration( const Vector1& b, - const IndexType row, - Vector2& x, - const RealType& omega ) const -{ - RealType sum( 0.0 ); - if( row > 0 ) - sum += this->getElementFast( row, row - 1 ) * x[ row - 1 ]; - if( row < this->getColumns() - 1 ) - sum += this->getElementFast( row, row + 1 ) * x[ row + 1 ]; - x[ row ] = ( 1.0 - omega ) * x[ row ] + omega / this->getElementFast( row, row ) * ( b[ row ] - sum ); -} - - -// copy assignment -template< typename Real, - typename Device, - typename Index > -Tridiagonal< Real, Device, Index >& -Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal& matrix ) -{ - this->setLike( matrix ); - this->values = matrix.values; - return *this; -} - -// cross-device copy assignment -template< typename Real, - typename Device, - typename Index > - template< typename Real2, typename Device2, typename Index2, typename > -Tridiagonal< Real, Device, Index >& -Tridiagonal< Real, Device, Index >::operator=( const Tridiagonal< Real2, Device2, Index2 >& matrix ) -{ - static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, - "unknown device" ); - static_assert( std::is_same< Device2, Devices::Host >::value || std::is_same< Device2, Devices::Cuda >::value, - "unknown device" ); - - this->setLike( matrix ); - - throw Exceptions::NotImplementedError("Cross-device assignment for the Tridiagonal format is not implemented yet."); -} - - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::save( File& file ) const -{ - Matrix< Real, Device, Index >::save( file ); - file << this->values; -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::load( File& file ) -{ - Matrix< Real, Device, Index >::load( file ); - file >> this->values; -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::save( const String& fileName ) const -{ - Object::save( fileName ); -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::load( const String& fileName ) -{ - Object::load( fileName ); -} - -template< typename Real, - typename Device, - typename Index > -void Tridiagonal< Real, Device, Index >::print( std::ostream& str ) const -{ - for( IndexType row = 0; row < this->getRows(); row++ ) - { - str <<"Row: " << row << " -> "; - for( IndexType column = row - 1; column < row + 2; column++ ) - if( column >= 0 && column < this->columns ) - str << " Col:" << column << "->" << this->getElement( row, column ) << "\t"; - str << std::endl; - } -} - -template< typename Real, - typename Device, - typename Index > -__cuda_callable__ -Index Tridiagonal< Real, Device, Index >::getElementIndex( const IndexType row, - const IndexType column ) const -{ - TNL_ASSERT( row >= 0 && column >= 0 && row < this->rows && column < this->rows, - std::cerr << " this->rows = " << this->rows - << " row = " << row << " column = " << column ); - TNL_ASSERT( abs( row - column ) < 2, - std::cerr << "row = " << row << " column = " << column << std::endl ); - return TridiagonalDeviceDependentCode< Device >::getElementIndex( this->rows, row, column ); -} - -template<> -class TridiagonalDeviceDependentCode< Devices::Host > -{ - public: - - typedef Devices::Host Device; - - template< typename Index > - __cuda_callable__ - static Index getElementIndex( const Index rows, - const Index row, - const Index column ) - { - return 2*row + column; - } - - template< typename Vector, - typename Index, - typename ValuesType > - __cuda_callable__ - static typename Vector::RealType rowVectorProduct( const Index rows, - const ValuesType& values, - const Index row, - const Vector& vector ) - { - if( row == 0 ) - return vector[ 0 ] * values[ 0 ] + - vector[ 1 ] * values[ 1 ]; - Index i = 3 * row; - if( row == rows - 1 ) - return vector[ row - 1 ] * values[ i - 1 ] + - vector[ row ] * values[ i ]; - return vector[ row - 1 ] * values[ i - 1 ] + - vector[ row ] * values[ i ] + - vector[ row + 1 ] * values[ i + 1 ]; - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, - const InVector& inVector, - OutVector& outVector ) - { -#ifdef HAVE_OPENMP -#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) -#endif - for( Index row = 0; row < matrix.getRows(); row ++ ) - outVector[ row ] = matrix.rowVectorProduct( row, inVector ); - } -}; - -template<> -class TridiagonalDeviceDependentCode< Devices::Cuda > -{ - public: - - typedef Devices::Cuda Device; - - template< typename Index > - __cuda_callable__ - static Index getElementIndex( const Index rows, - const Index row, - const Index column ) - { - return ( column - row + 1 )*rows + row - 1; - } - - template< typename Vector, - typename Index, - typename ValuesType > - __cuda_callable__ - static typename Vector::RealType rowVectorProduct( const Index rows, - const ValuesType& values, - const Index row, - const Vector& vector ) - { - if( row == 0 ) - return vector[ 0 ] * values[ 0 ] + - vector[ 1 ] * values[ rows - 1 ]; - Index i = row - 1; - if( row == rows - 1 ) - return vector[ row - 1 ] * values[ i ] + - vector[ row ] * values[ i + rows ]; - return vector[ row - 1 ] * values[ i ] + - vector[ row ] * values[ i + rows ] + - vector[ row + 1 ] * values[ i + 2*rows ]; - } - - template< typename Real, - typename Index, - typename InVector, - typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index >& matrix, - const InVector& inVector, - OutVector& outVector ) - { - MatrixVectorProductCuda( matrix, inVector, outVector ); - } -}; - -} // namespace Matrices -} // namespace TNL diff --git a/src/TNL/Matrices/details/DenseMatrix.h b/src/TNL/Matrices/details/DenseMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..96930b38660e6a853b59fa27ca59236a04a0397f --- /dev/null +++ b/src/TNL/Matrices/details/DenseMatrix.h @@ -0,0 +1,320 @@ +/*************************************************************************** + DenseMatrix.h - description + ------------------- + begin : Jan 5, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + namespace details { + +template< typename Device > +class DenseDeviceDependentCode; +template<> +class DenseDeviceDependentCode< Devices::Host > +{ + public: + + typedef Devices::Host Device; + + template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename InVector, + typename OutVector > + static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix, + const InVector& inVector, + OutVector& outVector ) + { +#ifdef HAVE_OPENMP +#pragma omp parallel for if( Devices::Host::isOMPEnabled() ) +#endif + for( Index row = 0; row < matrix.getRows(); row ++ ) + outVector[ row ] = matrix.rowVectorProduct( row, inVector ); + } +}; + +template<> +class DenseDeviceDependentCode< Devices::Cuda > +{ + public: + + typedef Devices::Cuda Device; + + template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename InVector, + typename OutVector > + static void vectorProduct( const DenseMatrixView< Real, Device, Index, RowMajorOrder >& matrix, + const InVector& inVector, + OutVector& outVector ) + { + MatrixVectorProductCuda( matrix, inVector, outVector ); + } +}; + +#ifdef HAVE_CUDA +template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename Matrix1, + typename Matrix2, + int tileDim, + int tileRowBlockSize > +__global__ void +DenseMatrixProductKernel( Dense< Real, Devices::Cuda, Index, RowMajorOrder >* resultMatrix, + const Matrix1* matrixA, + const Matrix2* matrixB, + const Real matrixAMultiplicator, + const Real matrixBMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + /**** + * Here we compute product C = A * B. To profit from the fast + * shared memory we do it by tiles. + */ + + typedef Index IndexType; + typedef Real RealType; + __shared__ Real tileA[ tileDim*tileDim ]; + __shared__ Real tileB[ tileDim*tileDim ]; + __shared__ Real tileC[ tileDim*tileDim ]; + + const IndexType& matrixARows = matrixA->getRows(); + const IndexType& matrixAColumns = matrixA->getColumns(); + const IndexType& matrixBRows = matrixB->getRows(); + const IndexType& matrixBColumns = matrixB->getColumns(); + + /**** + * Reset the tile C + */ + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] = 0.0; + + /**** + * Compute the result tile coordinates + */ + const IndexType resultTileRow = ( gridIdx_y*gridDim.y + blockIdx.y )*tileDim; + const IndexType resultTileColumn = ( gridIdx_x*gridDim.x + blockIdx.x )*tileDim; + + /**** + * Sum over the matrix tiles + */ + for( IndexType i = 0; i < matrixAColumns; i += tileDim ) + { + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + { + const IndexType matrixARow = resultTileRow + threadIdx.y + row; + const IndexType matrixAColumn = i + threadIdx.x; + if( matrixARow < matrixARows && matrixAColumn < matrixAColumns ) + tileA[ (threadIdx.y + row)*tileDim + threadIdx.x ] = + matrixAMultiplicator * matrixA->getElementFast( matrixARow, matrixAColumn ); + + const IndexType matrixBRow = i + threadIdx.y + row; + const IndexType matrixBColumn = resultTileColumn + threadIdx.x; + if( matrixBRow < matrixBRows && matrixBColumn < matrixBColumns ) + tileB[ (threadIdx.y + row)*tileDim + threadIdx.x ] = + matrixBMultiplicator * matrixB->getElementFast( matrixBRow, matrixBColumn ); + } + __syncthreads(); + + const IndexType tileALastRow = tnlCudaMin( tileDim, matrixARows - resultTileRow ); + const IndexType tileALastColumn = tnlCudaMin( tileDim, matrixAColumns - i ); + const IndexType tileBLastRow = tnlCudaMin( tileDim, matrixBRows - i ); + const IndexType tileBLastColumn = + tnlCudaMin( tileDim, matrixBColumns - resultTileColumn ); + + for( IndexType row = 0; row < tileALastRow; row += tileRowBlockSize ) + { + RealType sum( 0.0 ); + for( IndexType j = 0; j < tileALastColumn; j++ ) + sum += tileA[ ( threadIdx.y + row )*tileDim + j ]* + tileB[ j*tileDim + threadIdx.x ]; + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] += sum; + } + __syncthreads(); + } + + /**** + * Write the result tile to the result matrix + */ + const IndexType& matrixCRows = resultMatrix->getRows(); + const IndexType& matrixCColumns = resultMatrix->getColumns(); + for( IndexType row = 0; row < tileDim; row += tileRowBlockSize ) + { + const IndexType matrixCRow = resultTileRow + row + threadIdx.y; + const IndexType matrixCColumn = resultTileColumn + threadIdx.x; + if( matrixCRow < matrixCRows && matrixCColumn < matrixCColumns ) + resultMatrix->setElementFast( matrixCRow, + matrixCColumn, + tileC[ ( row + threadIdx.y )*tileDim + threadIdx.x ] ); + } + +} + +template< typename Real, + typename Index, + typename Matrix, + bool RowMajorOrder, + typename RealAllocator, + int tileDim, + int tileRowBlockSize > +__global__ void DenseTranspositionAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, + const Matrix* inputMatrix, + const Real matrixMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + __shared__ Real tile[ tileDim*tileDim ]; + + const Index columns = inputMatrix->getColumns(); + const Index rows = inputMatrix->getRows(); + + + /**** + * Diagonal mapping of the CUDA blocks + */ + Index blockIdx_x, blockIdx_y; + if( columns == rows ) + { + blockIdx_y = blockIdx.x; + blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x; + } + else + { + Index bID = blockIdx.x + gridDim.x*blockIdx.y; + blockIdx_y = bID % gridDim.y; + blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x; + } + + /**** + * Read the tile to the shared memory + */ + const Index readRowPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y; + const Index readColumnPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = + inputMatrix->getElementFast( readColumnPosition, + readRowPosition + rowBlock ); + } + __syncthreads(); + + /**** + * Write the tile to the global memory + */ + const Index writeRowPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y; + const Index writeColumnPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + resultMatrix->setElementFast( writeColumnPosition, + writeRowPosition + rowBlock, + matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); + + } + +} + +template< typename Real, + typename Index, + bool RowMajorOrder, + typename RealAllocator, + typename Matrix, + int tileDim, + int tileRowBlockSize > +__global__ void DenseTranspositionNonAlignedKernel( Dense< Real, Devices::Cuda, Index >* resultMatrix, + const Matrix* inputMatrix, + const Real matrixMultiplicator, + const Index gridIdx_x, + const Index gridIdx_y ) +{ + __shared__ Real tile[ tileDim*tileDim ]; + + const Index columns = inputMatrix->getColumns(); + const Index rows = inputMatrix->getRows(); + + /**** + * Diagonal mapping of the CUDA blocks + */ + Index blockIdx_x, blockIdx_y; + if( columns == rows ) + { + blockIdx_y = blockIdx.x; + blockIdx_x = (blockIdx.x+blockIdx.y)%gridDim.x; + } + else + { + Index bID = blockIdx.x + gridDim.x*blockIdx.y; + blockIdx_y = bID % gridDim.y; + blockIdx_x = ( ( bID / gridDim.y ) + blockIdx_y ) % gridDim.x; + } + + /**** + * Read the tile to the shared memory + */ + const Index readRowPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.y; + const Index readColumnPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.x; + if( readColumnPosition < columns ) + { + const Index readOffset = readRowPosition * columns + readColumnPosition; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + if( readRowPosition + rowBlock < rows ) + tile[ Cuda::getInterleaving( threadIdx.x*tileDim + threadIdx.y + rowBlock ) ] = + inputMatrix->getElementFast( readColumnPosition, + readRowPosition + rowBlock ); + } + } + __syncthreads(); + + /**** + * Write the tile to the global memory + */ + const Index writeRowPosition = + ( gridIdx_x*gridDim.x + blockIdx_x )*tileDim + threadIdx.y; + const Index writeColumnPosition = + ( gridIdx_y*gridDim.y + blockIdx_y )*tileDim + threadIdx.x; + if( writeColumnPosition < rows ) + { + const Index writeOffset = writeRowPosition * rows + writeColumnPosition; + for( Index rowBlock = 0; + rowBlock < tileDim; + rowBlock += tileRowBlockSize ) + { + if( writeRowPosition + rowBlock < columns ) + resultMatrix->setElementFast( writeColumnPosition, + writeRowPosition + rowBlock, + matrixMultiplicator * tile[ Cuda::getInterleaving( ( threadIdx.y + rowBlock ) * tileDim + threadIdx.x ) ] ); + } + } + +} + +#endif + + } //namespace details + } //namepsace Matrices +} //namespace TNL \ No newline at end of file diff --git a/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h new file mode 100644 index 0000000000000000000000000000000000000000..3597c30f7d3eec37ef85b050cb01963e5f34715a --- /dev/null +++ b/src/TNL/Matrices/details/MultidiagonalMatrixIndexer.h @@ -0,0 +1,109 @@ +/*************************************************************************** + MultidiagonalMatrixIndexer.h - description + ------------------- + begin : Jan 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + namespace details { + +template< typename Index, + bool RowMajorOrder > +class MultidiagonalMatrixIndexer +{ + public: + + using IndexType = Index; + + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + + __cuda_callable__ + MultidiagonalMatrixIndexer() + : rows( 0 ), columns( 0 ), nonemptyRows( 0 ){}; + + __cuda_callable__ + MultidiagonalMatrixIndexer( const IndexType& rows, + const IndexType& columns, + const IndexType& diagonals, + const IndexType& nonemptyRows ) + : rows( rows ), + columns( columns ), + diagonals( diagonals ), + nonemptyRows( nonemptyRows ) {}; + + __cuda_callable__ + MultidiagonalMatrixIndexer( const MultidiagonalMatrixIndexer& indexer ) + : rows( indexer.rows ), + columns( indexer.columns ), + diagonals( indexer.diagonals ), + nonemptyRows( indexer.nonemptyRows ) {}; + + void set( const IndexType& rows, + const IndexType& columns, + const IndexType& diagonals, + const IndexType& nonemptyRows ) + { + this->rows = rows; + this->columns = columns; + this->diagonals = diagonals; + this->nonemptyRows = nonemptyRows; + }; + + /*__cuda_callable__ + IndexType getRowSize( const IndexType rowIdx ) const + { + if( rowIdx == 0 ) + return 2; + if( columns <= rows ) + { + if( rowIdx == columns - 1 ) + return 2; + if( rowIdx == columns ) + return 1; + } + return 3; + };*/ + + __cuda_callable__ + const IndexType& getRows() const { return this->rows; }; + + __cuda_callable__ + const IndexType& getColumns() const { return this->columns; }; + + __cuda_callable__ + const IndexType& getDiagonals() const { return this->diagonals; }; + + __cuda_callable__ + const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; }; + + __cuda_callable__ + IndexType getStorageSize() const { return diagonals * this->nonemptyRows; }; + + __cuda_callable__ + IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const + { + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, diagonals, "" ); + TNL_ASSERT_GE( rowIdx, 0, "" ); + TNL_ASSERT_LT( rowIdx, this->rows, "" ); + + if( RowMajorOrder ) + return diagonals * rowIdx + localIdx; + else + return localIdx * nonemptyRows + rowIdx; + }; + + protected: + + IndexType rows, columns, diagonals, nonemptyRows; +}; + } //namespace details + } // namespace Materices +} // namespace TNL diff --git a/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h new file mode 100644 index 0000000000000000000000000000000000000000..64beb44f736152c1ddb57e2b2647e2e1cd8f8870 --- /dev/null +++ b/src/TNL/Matrices/details/TridiagonalMatrixIndexer.h @@ -0,0 +1,92 @@ +/*************************************************************************** + TridiagonalMatrixIndexer.h - description + ------------------- + begin : Jan 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Matrices { + namespace details { + +template< typename Index, + bool RowMajorOrder > +class TridiagonalMatrixIndexer +{ + public: + + using IndexType = Index; + + static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + + __cuda_callable__ + TridiagonalMatrixIndexer() + : rows( 0 ), columns( 0 ), nonemptyRows( 0 ){}; + + __cuda_callable__ + TridiagonalMatrixIndexer( const IndexType& rows, const IndexType& columns ) + : rows( rows ), columns( columns ), nonemptyRows( TNL::min( rows, columns ) + ( rows > columns ) ) {}; + + __cuda_callable__ + TridiagonalMatrixIndexer( const TridiagonalMatrixIndexer& indexer ) + : rows( indexer.rows ), columns( indexer.columns ), nonemptyRows( indexer.nonemptyRows ) {}; + + void setDimensions( const IndexType& rows, const IndexType& columns ) + { + this->rows = rows; + this->columns = columns; + this->nonemptyRows = min( rows, columns ) + ( rows > columns ); + }; + + __cuda_callable__ + IndexType getRowSize( const IndexType rowIdx ) const + { + if( rowIdx == 0 ) + return 2; + if( columns <= rows ) + { + if( rowIdx == columns - 1 ) + return 2; + if( rowIdx == columns ) + return 1; + } + return 3; + }; + + __cuda_callable__ + const IndexType& getRows() const { return this->rows; }; + + __cuda_callable__ + const IndexType& getColumns() const { return this->columns; }; + + __cuda_callable__ + const IndexType& getNonemptyRowsCount() const { return this->nonemptyRows; }; + __cuda_callable__ + IndexType getStorageSize() const { return 3 * this->nonemptyRows; }; + + __cuda_callable__ + IndexType getGlobalIndex( const Index rowIdx, const Index localIdx ) const + { + TNL_ASSERT_GE( localIdx, 0, "" ); + TNL_ASSERT_LT( localIdx, 3, "" ); + TNL_ASSERT_GE( rowIdx, 0, "" ); + TNL_ASSERT_LT( rowIdx, this->rows, "" ); + + if( RowMajorOrder ) + return 3 * rowIdx + localIdx; + else + return localIdx * nonemptyRows + rowIdx; + }; + + protected: + + IndexType rows, columns, nonemptyRows; +}; + } //namespace details + } // namespace Materices +} // namespace TNL diff --git a/src/TNL/Problems/HeatEquationProblem.h b/src/TNL/Problems/HeatEquationProblem.h index 26df28965ec42e855fd034de7dea748999381e67..6a89742270b7f683764184227020351069059bfa 100644 --- a/src/TNL/Problems/HeatEquationProblem.h +++ b/src/TNL/Problems/HeatEquationProblem.h @@ -18,7 +18,7 @@ #include <TNL/Problems/PDEProblem.h> #include <TNL/Operators/diffusion/LinearDiffusion.h> -#include <TNL/Matrices/Ellpack.h> +#include <TNL/Matrices/Legacy/Ellpack.h> #include <TNL/Functions/MeshFunction.h> #include <TNL/Timer.h> #include <TNL/Solvers/PDE/ExplicitUpdater.h> diff --git a/src/TNL/Problems/HeatEquationProblem_impl.h b/src/TNL/Problems/HeatEquationProblem_impl.h index bc339e9b3ba56eb9e4d3499d4954be57cda7d864..98cd6d5e4f0f74b797fb88b8b88c83079aee76ee 100644 --- a/src/TNL/Problems/HeatEquationProblem_impl.h +++ b/src/TNL/Problems/HeatEquationProblem_impl.h @@ -18,7 +18,7 @@ #include <TNL/FileName.h> #include <TNL/Matrices/MatrixSetter.h> -#include <TNL/Matrices/MultidiagonalMatrixSetter.h> +#include <TNL/Matrices/Legacy/MultidiagonalMatrixSetter.h> #include <TNL/Logger.h> #include <TNL/Solvers/PDE/BoundaryConditionsSetter.h> diff --git a/src/TNL/Problems/PDEProblem.h b/src/TNL/Problems/PDEProblem.h index 69d95aaeee7e5db273940602b7f192c0b75b2591..c81ffdd396a6caa3b535b942572c65b65749f94c 100644 --- a/src/TNL/Problems/PDEProblem.h +++ b/src/TNL/Problems/PDEProblem.h @@ -13,7 +13,7 @@ #include <TNL/Problems/Problem.h> #include <TNL/Problems/CommonData.h> #include <TNL/Pointers/SharedPointer.h> -#include <TNL/Matrices/SlicedEllpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> #include <TNL/Solvers/PDE/TimeDependentPDESolver.h> namespace TNL { diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h index d5127fab5dec1e67a97d254f57e81f8d49e3d847..1f2b9f1981c837108be68e0e2864c69537afaf2f 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILU0.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0.h @@ -15,7 +15,7 @@ #include "Preconditioner.h" #include <TNL/Containers/Vector.h> -#include <TNL/Matrices/CSR.h> +#include <TNL/Matrices/Legacy/CSR.h> #include <TNL/Pointers/UniquePointer.h> #include <TNL/Exceptions/NotImplementedError.h> diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h index cce3dc5c4bde030dc33c4762623124e1d3f65367..6a4a4a83b52fd393bbde144dc5591c7583f4c1e7 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILUT.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILUT.h @@ -15,7 +15,7 @@ #include "Preconditioner.h" #include <TNL/Containers/Vector.h> -#include <TNL/Matrices/CSR.h> +#include <TNL/Matrices/Legacy/CSR.h> namespace TNL { namespace Solvers { diff --git a/src/TNL/Solvers/SolverConfig_impl.h b/src/TNL/Solvers/SolverConfig_impl.h index 9d3515157feeac58a73b56353274524a56f6ec1a..70e7737eee5292617355f7899ecc526d439de2c0 100644 --- a/src/TNL/Solvers/SolverConfig_impl.h +++ b/src/TNL/Solvers/SolverConfig_impl.h @@ -16,8 +16,7 @@ #include <TNL/Solvers/PDE/ExplicitTimeStepper.h> #include <TNL/Solvers/PDE/TimeDependentPDESolver.h> #include <TNL/Solvers/LinearSolverTypeResolver.h> -#include <TNL/Matrices/CSR.h> -#include <TNL/Meshes/DistributedMeshes/DistributedGrid.h> +#include <TNL/Matrices/Legacy/CSR.h> namespace TNL { namespace Solvers { diff --git a/src/UnitTests/Containers/ArrayTest.h b/src/UnitTests/Containers/ArrayTest.h index ef3119365f50444d89154e487a65a12464849062..255a67fb911b995ea409c341fe9a00104b5d95bf 100644 --- a/src/UnitTests/Containers/ArrayTest.h +++ b/src/UnitTests/Containers/ArrayTest.h @@ -135,6 +135,11 @@ TYPED_TEST( ArrayTest, constructors ) v = 0; EXPECT_EQ( v.getSize(), 10 ); + ArrayType vv( 10, 4 ); + EXPECT_EQ( vv.getSize(), 10 ); + for( int i = 0; i < 10; i++ ) + EXPECT_EQ( vv.getElement( i ), 4 ); + // deep copy ArrayType w( v ); EXPECT_NE( w.getData(), v.getData() ); diff --git a/src/UnitTests/Containers/CMakeLists.txt b/src/UnitTests/Containers/CMakeLists.txt index 9f27aaa86078937540ccc1716069faec0f69b424..227a8655157bbaa0beff7aeac668e592d1f01456 100644 --- a/src/UnitTests/Containers/CMakeLists.txt +++ b/src/UnitTests/Containers/CMakeLists.txt @@ -1,3 +1,5 @@ +ADD_SUBDIRECTORY( Segments ) + ADD_EXECUTABLE( ArrayTest ArrayTest.cpp ) TARGET_COMPILE_OPTIONS( ArrayTest PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( ArrayTest ${GTEST_BOTH_LIBRARIES} ) diff --git a/src/UnitTests/Containers/DistributedArrayTest.h b/src/UnitTests/Containers/DistributedArrayTest.h index f4bd35830369a4251797bb363472a7b51e536878..204bc6fe753c9f75b55bd3523eb1f708faf0b857 100644 --- a/src/UnitTests/Containers/DistributedArrayTest.h +++ b/src/UnitTests/Containers/DistributedArrayTest.h @@ -94,7 +94,7 @@ TYPED_TEST( DistributedArrayTest, copyFromGlobal ) ArrayViewType localArrayView = this->distributedArray.getLocalView(); auto globalView = globalArray.getConstView(); const auto localRange = this->distributedArray.getLocalRange(); - globalView.bind( &globalArray[ localRange.getBegin() ], localRange.getEnd() - localRange.getBegin() ); + globalView.bind( &globalArray.getData()[ localRange.getBegin() ], localRange.getEnd() - localRange.getBegin() ); EXPECT_EQ( localArrayView, globalView ); } diff --git a/src/UnitTests/Containers/Segments/CMakeLists.txt b/src/UnitTests/Containers/Segments/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..742fb69ef3060451f54bacafdb7fc1ca0d49a64d --- /dev/null +++ b/src/UnitTests/Containers/Segments/CMakeLists.txt @@ -0,0 +1,52 @@ +IF( BUILD_CUDA ) +# CUDA_ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + +# CUDA_ADD_EXECUTABLE( SegmentsTest_BiEllpack SegmentsTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + +# CUDA_ADD_EXECUTABLE( SegmentsTest_ChunkedEllpack SegmentsTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( SegmentsTest_CSR SegmentsTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + +ELSE( BUILD_CUDA ) +# ADD_EXECUTABLE( SegmentsTest_AdEllpack SegmentsTest_AdEllpack.cpp ) +# TARGET_COMPILE_OPTIONS( SegmentsTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + +# ADD_EXECUTABLE( SegmentsTest_BiEllpack SegmentsTest_BiEllpack.cpp ) +# TARGET_COMPILE_OPTIONS( SegmentsTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + +# ADD_EXECUTABLE( SegmentsTest_ChunkedEllpack SegmentsTest_ChunkedEllpack.cpp ) +# TARGET_COMPILE_OPTIONS( SegmentsTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) +# TARGET_LINK_LIBRARIES( SegmentsTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( SegmentsTest_CSR SegmentsTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( SegmentsTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( SegmentsTest_Ellpack SegmentsTest_Ellpack.cpp ) + TARGET_COMPILE_OPTIONS( SegmentsTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( SegmentsTest_SlicedEllpack SegmentsTest_SlicedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( SegmentsTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SegmentsTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) +ENDIF( BUILD_CUDA ) + + +#ADD_TEST( SegmentsTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +#ADD_TEST( SegmentsTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SegmentsTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SegmentsTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SegmentsTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SegmentsTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) + diff --git a/src/UnitTests/Containers/Segments/SegmentsTest.hpp b/src/UnitTests/Containers/Segments/SegmentsTest.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6d4692dbe76785970d75bc7763216ad98f9b7be4 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest.hpp @@ -0,0 +1,161 @@ +/*************************************************************************** + SegmentsTest.hpp - description + ------------------- + begin : Dec 6, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Math.h> +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +template< typename Segments > +void test_SetSegmentsSizes_EqualSizes() +{ + using DeviceType = typename Segments::DeviceType; + using IndexType = typename Segments::IndexType; + + const IndexType segmentsCount = 20; + const IndexType segmentSize = 5; + TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount ); + segmentsSizes = segmentSize; + + Segments segments( segmentsSizes ); + + EXPECT_EQ( segments.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments.getSegmentSize( i ), segmentSize ); + + Segments segments2( segments ); + EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments2.getSize(), segments2.getStorageSize() ); + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize ); + + Segments segments3; + segments3.setSegmentsSizes( segmentsSizes ); + + EXPECT_EQ( segments3.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments3.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments3.getSize(), segments3.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); + + using SegmentsView = typename Segments::ViewType; + + SegmentsView segmentsView = segments.getView(); + EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize ); +} + +template< typename Segments > +void test_SetSegmentsSizes_EqualSizes_EllpackOnly() +{ + using DeviceType = typename Segments::DeviceType; + using IndexType = typename Segments::IndexType; + + const IndexType segmentsCount = 20; + const IndexType segmentSize = 5; + + Segments segments( segmentsCount, segmentSize ); + + EXPECT_EQ( segments.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments.getSegmentSize( i ), segmentSize ); + + Segments segments2( segments ); + EXPECT_EQ( segments2.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments2.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments2.getSize(), segments2.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments2.getSegmentSize( i ), segmentSize ); + + Segments segments3; + segments3.setSegmentsSizes( segmentsCount, segmentSize ); + + EXPECT_EQ( segments3.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segments3.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segments3.getSize(), segments3.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segments3.getSegmentSize( i ), segmentSize ); + + using SegmentsView = typename Segments::ViewType; + + SegmentsView segmentsView = segments.getView(); + EXPECT_EQ( segmentsView.getSegmentsCount(), segmentsCount ); + EXPECT_EQ( segmentsView.getSize(), segmentsCount * segmentSize ); + EXPECT_LE( segmentsView.getSize(), segments.getStorageSize() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( segmentsView.getSegmentSize( i ), segmentSize ); +} + +template< typename Segments > +void test_AllReduction_MaximumInSegments() +{ + using DeviceType = typename Segments::DeviceType; + using IndexType = typename Segments::IndexType; + + const IndexType segmentsCount = 20; + const IndexType segmentSize = 5; + + TNL::Containers::Vector< IndexType, DeviceType, IndexType > segmentsSizes( segmentsCount ); + segmentsSizes = segmentSize; + + Segments segments( segmentsSizes ); + + TNL::Containers::Vector< IndexType, DeviceType, IndexType > v( segments.getStorageSize() ); + + auto view = v.getView(); + auto init = [=] __cuda_callable__ ( const IndexType segmentIdx, const IndexType localIdx, const IndexType globalIdx ) mutable -> bool { + view[ globalIdx ] = segmentIdx * 5 + localIdx + 1; + return true; + }; + segments.forAll( init ); + + TNL::Containers::Vector< IndexType, DeviceType, IndexType >result( segmentsCount ); + + const auto v_view = v.getConstView(); + auto result_view = result.getView(); + auto fetch = [=] __cuda_callable__ ( IndexType segmentIdx, IndexType localIdx, IndexType globalIdx, bool& compute ) -> IndexType { + return v_view[ globalIdx ]; + }; + auto reduce = [] __cuda_callable__ ( IndexType& a, const IndexType b ) { + a = TNL::max( a, b ); + }; + auto keep = [=] __cuda_callable__ ( const IndexType i, const IndexType a ) mutable { + result_view[ i ] = a; + }; + segments.allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); + + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize ); + + result_view = 0; + segments.getView().allReduction( fetch, reduce, keep, std::numeric_limits< IndexType >::min() ); + for( IndexType i = 0; i < segmentsCount; i++ ) + EXPECT_EQ( result.getElement( i ), ( i + 1 ) * segmentSize ); +} + +#endif diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp new file mode 100644 index 0000000000000000000000000000000000000000..02edac3325f027e68b867500f714a275517dcbf2 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cpp @@ -0,0 +1 @@ +#include "SegmentsTest_CSR.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu new file mode 100644 index 0000000000000000000000000000000000000000..02edac3325f027e68b867500f714a275517dcbf2 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.cu @@ -0,0 +1 @@ +#include "SegmentsTest_CSR.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h new file mode 100644 index 0000000000000000000000000000000000000000..f2a3a186390e6038f263cd195ef600a7516b0a9e --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_CSR.h @@ -0,0 +1,56 @@ +/*************************************************************************** + SegmentsTest_CSR.h - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Segments/CSR.h> + +#include "SegmentsTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Segments > +class CSRSegmentsTest : public ::testing::Test +{ +protected: + using CSRSegmentsType = Segments; +}; + +// types for which MatrixTest is instantiated +using CSRSegmentsTypes = ::testing::Types +< + TNL::Containers::Segments::CSR< TNL::Devices::Host, int >, + TNL::Containers::Segments::CSR< TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Containers::Segments::CSR< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::CSR< TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( CSRSegmentsTest, CSRSegmentsTypes ); + +TYPED_TEST( CSRSegmentsTest, setSegmentsSizes_EqualSizes ) +{ + using CSRSegmentsType = typename TestFixture::CSRSegmentsType; + + test_SetSegmentsSizes_EqualSizes< CSRSegmentsType >(); +} + +TYPED_TEST( CSRSegmentsTest, allReduction_MaximumInSegments ) +{ + using CSRSegmentsType = typename TestFixture::CSRSegmentsType; + + test_AllReduction_MaximumInSegments< CSRSegmentsType >(); +} + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..120a25103eb1e48d32ed9d9233f6657ba02699a6 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cpp @@ -0,0 +1 @@ +#include "SegmentsTest_Ellpack.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu new file mode 100644 index 0000000000000000000000000000000000000000..120a25103eb1e48d32ed9d9233f6657ba02699a6 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.cu @@ -0,0 +1 @@ +#include "SegmentsTest_Ellpack.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h new file mode 100644 index 0000000000000000000000000000000000000000..7def8a7329d0845236f213250dbeceecb5775263 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_Ellpack.h @@ -0,0 +1,63 @@ +/*************************************************************************** + SegmentsTest_Ellpack.h - description + ------------------- + begin : Dec 6, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Segments/Ellpack.h> + +#include "SegmentsTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Segments > +class EllpackSegmentsTest : public ::testing::Test +{ +protected: + using EllpackSegmentsType = Segments; +}; + +// types for which MatrixTest is instantiated +using EllpackSegmentsTypes = ::testing::Types +< + TNL::Containers::Segments::Ellpack< TNL::Devices::Host, int >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::Ellpack< TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( EllpackSegmentsTest, EllpackSegmentsTypes ); + +TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes ) +{ + using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType; + + test_SetSegmentsSizes_EqualSizes< EllpackSegmentsType >(); +} + +TYPED_TEST( EllpackSegmentsTest, setSegmentsSizes_EqualSizes_EllpackOnly ) +{ + using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType; + + test_SetSegmentsSizes_EqualSizes_EllpackOnly< EllpackSegmentsType >(); +} + +TYPED_TEST( EllpackSegmentsTest, allReduction_MaximumInSegments ) +{ + using EllpackSegmentsType = typename TestFixture::EllpackSegmentsType; + + test_AllReduction_MaximumInSegments< EllpackSegmentsType >(); +} + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cd9865f28005d55a1912dcc8f6f7c83844579ec6 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cpp @@ -0,0 +1 @@ +#include "SegmentsTest_SlicedEllpack.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu new file mode 100644 index 0000000000000000000000000000000000000000..cd9865f28005d55a1912dcc8f6f7c83844579ec6 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.cu @@ -0,0 +1 @@ +#include "SegmentsTest_SlicedEllpack.h" diff --git a/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..51131c7dfb0a95df305ca07cf26ef6f7f5350132 --- /dev/null +++ b/src/UnitTests/Containers/Segments/SegmentsTest_SlicedEllpack.h @@ -0,0 +1,56 @@ +/*************************************************************************** + SegmentsTest_SlicedEllpack.h - description + ------------------- + begin : Dec 9, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Segments/SlicedEllpack.h> + +#include "SegmentsTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Segments > +class SlicedEllpackSegmentsTest : public ::testing::Test +{ +protected: + using SlicedEllpackSegmentsType = Segments; +}; + +// types for which MatrixTest is instantiated +using SlicedEllpackSegmentsTypes = ::testing::Types +< + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, int >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, int >, + TNL::Containers::Segments::SlicedEllpack< TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( SlicedEllpackSegmentsTest, SlicedEllpackSegmentsTypes ); + +TYPED_TEST( SlicedEllpackSegmentsTest, setSegmentsSizes_EqualSizes ) +{ + using SlicedEllpackSegmentsType = typename TestFixture::SlicedEllpackSegmentsType; + + test_SetSegmentsSizes_EqualSizes< SlicedEllpackSegmentsType >(); +} + +TYPED_TEST( SlicedEllpackSegmentsTest, allReduction_MaximumInSegments ) +{ + using SlicedEllpackSegmentsType = typename TestFixture::SlicedEllpackSegmentsType; + + test_AllReduction_MaximumInSegments< SlicedEllpackSegmentsType >(); +} + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..51d7c4ea9425460ff35991bd44a3b9bf6f7e9a95 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixCopyTest.cpp - description + ------------------- + begin : Feb 5, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..f29db9e968486e16fcf3a9c6f2c8c1e067119344 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixCopyTest.cu - description + ------------------- + begin : Feb 5, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h new file mode 100644 index 0000000000000000000000000000000000000000..b901acbbd93dd7a7416645e70441d1382bd381a3 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h @@ -0,0 +1,820 @@ +/*************************************************************************** + BinaryBinarySparseMatrixCopyTest.h - description + ------------------- + begin : Feb 5, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Matrices/Legacy/CSR.h> +#include <TNL/Matrices/Legacy/Ellpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> + +#include <TNL/Matrices/SparseMatrix.h> +#include <TNL/Matrices/MatrixType.h> +#include <TNL/Matrices/Dense.h> +#include <TNL/Matrices/Tridiagonal.h> +#include <TNL/Matrices/Multidiagonal.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Containers/Segments/SlicedEllpack.h> + +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >; +using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, EllpackSegments >; +using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, EllpackSegments >; +using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, SlicedEllpackSegments >; + + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +/* + * Sets up the following 10x6 sparse matrix: + * + * / 1 1 \ + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 1 1 | + * | 1 1 1 1 1 | + * | 1 1 | + * | 1 | + * | 1 | + * | 1 1 1 1 1 | + * \ 1 / + */ +template< typename Matrix > +void setupUnevenRowSizeMatrix( Matrix& m ) +{ + const int rows = 10; + const int cols = 6; + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 5 ); + rowLengths.setElement( 0, 2 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 3 ); + rowLengths.setElement( 5, 2 ); + rowLengths.setElement( 6, 1 ); + rowLengths.setElement( 7, 1 ); + rowLengths.setElement( 9, 1 ); + m.setCompressedRowLengths( rowLengths ); + + for( int i = 0; i < cols - 4; i++ ) // 0th row + m.setElement( 0, i, 1 ); + + for( int i = 3; i < cols; i++ ) // 1st row + m.setElement( 1, i, 1 ); + + for( int i = 0; i < cols - 3; i++ ) // 2nd row + m.setElement( 2, i, 1 ); + + for( int i = 1; i < cols; i++ ) // 3rd row + m.setElement( 3, i, 1 ); + + for( int i = 0; i < cols - 1; i++ ) // 4th row + m.setElement( 4, i, 1 ); + + for( int i = 0; i < cols - 4; i++ ) // 5th row + m.setElement( 5, i, 1 ); + + m.setElement( 6, 0, 1 ); // 6th row + + m.setElement( 7, 0, 1 ); // 7th row + + for( int i = 0; i < cols - 1; i++ ) // 8th row + m.setElement( 8, i, 1 ); + + m.setElement( 9, 5, 1 ); // 9th row +} + +template< typename Matrix > +void checkUnevenRowSizeMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 10 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 1 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 1 ); + EXPECT_EQ( m.getElement( 1, 4 ), 1 ); + EXPECT_EQ( m.getElement( 1, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 1 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); + EXPECT_EQ( m.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m.getElement( 3, 4 ), 1 ); + EXPECT_EQ( m.getElement( 3, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 1 ); + EXPECT_EQ( m.getElement( 4, 2 ), 1 ); + EXPECT_EQ( m.getElement( 4, 3 ), 1 ); + EXPECT_EQ( m.getElement( 4, 4 ), 1 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 1 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 1 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 1 ); + EXPECT_EQ( m.getElement( 8, 1 ), 1 ); + EXPECT_EQ( m.getElement( 8, 2 ), 1 ); + EXPECT_EQ( m.getElement( 8, 3 ), 1 ); + EXPECT_EQ( m.getElement( 8, 4 ), 1 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 0 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 0 ); + EXPECT_EQ( m.getElement( 9, 5 ), 1 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 1 1 \ + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 | + * \ 1 / + */ +template< typename Matrix > +void setupAntiTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0, 4); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + for( int i = 0; i < rows; i++ ) + for( int j = cols - 1; j > 2; j-- ) + if( j - i + 1 < cols && j - i + 1 >= 0 ) + m.setElement( i, j - i + 1, 1 ); +} + +template< typename Matrix > +void checkAntiTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 1); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 1 ); + EXPECT_EQ( m.getElement( 1, 4 ), 1 ); + EXPECT_EQ( m.getElement( 1, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); + EXPECT_EQ( m.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 1 ); + EXPECT_EQ( m.getElement( 4, 2 ), 1 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 1 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 1 1 \ + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 1 | + * | 1 1 | + * \ 1 / + */ +template< typename Matrix > +void setupTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0 , 4 ); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + for( int i = 0; i < rows; i++ ) + for( int j = 0; j < 3; j++ ) + if( i + j - 1 >= 0 && i + j - 1 < cols ) + m.setElement( i, i + j - 1, 1 ); +} + +template< typename Matrix > +void checkTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 1 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); + EXPECT_EQ( m.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m.getElement( 3, 4 ), 1 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 1 ); + EXPECT_EQ( m.getElement( 4, 4 ), 1 ); + EXPECT_EQ( m.getElement( 4, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 1 ); + EXPECT_EQ( m.getElement( 5, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 1 ); +} + +template< typename Matrix1, typename Matrix2 > +void testCopyAssignment() +{ + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag2 ); + } + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +template< typename Matrix1, typename Matrix2 > +void testConversion() +{ + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag2 ); + } + + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +template< typename Matrix > +void tridiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + TridiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ ) + hostMatrix.setElement( i, j, TNL::min( i + j, 1 ) ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); + } + +#ifdef HAVE_CUDA + TridiagonalCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); + } +#endif +} + +template< typename Matrix > +void multidiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; + DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + + const IndexType rows( 10 ), columns( 10 ); + MultidiagonalHost hostMatrix( rows, columns, diagonals ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( diagonals.containsValue( j - i ) ) + hostMatrix.setElement( i, j, TNL::min( i + j, 1 ) ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 }; + /*std::cerr << "hostMatrix " << hostMatrix << std::endl; + std::cerr << "matrix " << matrix << std::endl; + std::cerr << "rowCapacities " << rowCapacities << std::endl;*/ + + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } + +#ifdef HAVE_CUDA + MultidiagonalCuda cudaMatrix( rows, columns, diagonals ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } +#endif +} + +template< typename Matrix > +void denseMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = TNL::min( i + j, 1 ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), TNL::min( i + j, 1 ) ); + } +#endif +} + +TEST( BinarySparseMatrixCopyTest, CSR_HostToHost ) +{ + testCopyAssignment< CSR_host, CSR_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, CSR_HostToCuda ) +{ + testCopyAssignment< CSR_host, CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, CSR_CudaToHost ) +{ + testCopyAssignment< CSR_cuda, CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, CSR_CudaToCuda ) +{ + testCopyAssignment< CSR_cuda, CSR_cuda >(); +} +#endif + + +TEST( BinarySparseMatrixCopyTest, Ellpack_HostToHost ) +{ + testCopyAssignment< E_host, E_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, Ellpack_HostToCuda ) +{ + testCopyAssignment< E_host, E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_CudaToHost ) +{ + testCopyAssignment< E_cuda, E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_CudaToCuda ) +{ + testCopyAssignment< E_cuda, E_cuda >(); +} +#endif + + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_HostToHost ) +{ + testCopyAssignment< SE_host, SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_HostToCuda ) +{ + testCopyAssignment< SE_host, SE_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_CudaToHost ) +{ + testCopyAssignment< SE_cuda, SE_host >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_CudaToCuda ) +{ + testCopyAssignment< SE_cuda, SE_cuda >(); +} +#endif + +//// +// Test of conversion between formats +TEST( BinarySparseMatrixCopyTest, CSR_to_Ellpack_host ) +{ + testConversion< CSR_host, E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_to_CSR_host ) +{ + testConversion< E_host, CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, CSR_to_SlicedEllpack_host ) +{ + testConversion< CSR_host, SE_host >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_CSR_host ) +{ + testConversion< SE_host, CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host ) +{ + testConversion< E_host, SE_host >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host ) +{ + testConversion< SE_host, E_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, CSR_to_Ellpack_cuda ) +{ + testConversion< CSR_cuda, E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_to_CSR_cuda ) +{ + testConversion< E_cuda, CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda ) +{ + testConversion< CSR_cuda, SE_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda ) +{ + testConversion< SE_cuda, CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda ) +{ + testConversion< E_cuda, SE_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) +{ + testConversion< SE_cuda, E_cuda >(); +} +#endif + +//// +// Tridiagonal matrix assignment test +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host ) +{ + tridiagonalMatrixAssignment< CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host ) +{ + tridiagonalMatrixAssignment< E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + tridiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda ) +{ + tridiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + tridiagonalMatrixAssignment< E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + tridiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +//// +// Multidiagonal matrix assignment test +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_host ) +{ + multidiagonalMatrixAssignment< CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_host ) +{ + multidiagonalMatrixAssignment< E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + multidiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_cuda ) +{ + multidiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + multidiagonalMatrixAssignment< E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + multidiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +//// +// Dense matrix assignment test +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host ) +{ + denseMatrixAssignment< CSR_host >(); +} + +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host ) +{ + denseMatrixAssignment< E_host >(); +} + +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host ) +{ + denseMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda ) +{ + denseMatrixAssignment< CSR_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda ) +{ + denseMatrixAssignment< E_cuda >(); +} + +TEST( BinarySparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda ) +{ + denseMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +#endif //HAVE_GTEST + +#include "../main.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp new file mode 100644 index 0000000000000000000000000000000000000000..276c432ff349321beb642c177cb3ef6cd282059a --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp @@ -0,0 +1,1203 @@ +/*************************************************************************** + SparseMatrixTest_impl.h - description + ------------------- + begin : Nov 22, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Math.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <iostream> +#include <sstream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +template< typename MatrixHostFloat, typename MatrixHostInt > +void host_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename MatrixCudaFloat, typename MatrixCudaInt > +void cuda_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); + + Matrix m2( rows, cols ); + EXPECT_EQ( m2.getRows(), 9 ); + EXPECT_EQ( m2.getColumns(), 8 ); +} + +template< typename Matrix > +void test_SetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + + IndexType rowLength = 1; + for( IndexType i = 2; i < rows; i++ ) + rowLengths.setElement( i, rowLength++ ); + + m.setCompressedRowLengths( rowLengths ); + + // Insert values into the rows. + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, 1 ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, 1 ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, 1 ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, 1 ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, 1 ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, 1 ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, 1 ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, 1 ); + + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1( rows + 1, cols + 2 ); + Matrix2 m2( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 1 0 1 0 1 0 0 0 \ + * | 1 1 1 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 0 0 | + * | 1 1 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 1 | + * \ 1 1 1 1 1 1 1 1 1 1 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 1 ); + rowLengths.setElement( 1, 1 ); + rowLengths.setElement( 2, 1 ); + rowLengths.setElement( 3, 1 ); + for( IndexType i = 4; i < rows - 2; i++ ) + rowLengths.setElement( i, 1 ); + + rowLengths.setElement( 8, 1 ); + rowLengths.setElement( 9, 1 ); + m.setCompressedRowLengths( rowLengths ); + + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, 1 ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, 1 ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, 1 ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, 1 ); + + for( IndexType j = 8; j < rows; j++) + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, 1 ); + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols ); + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 1 0 1 0 1 0 0 0 \ + * | 1 1 1 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 0 0 | + * | 1 1 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 1 | + * \ 1 1 1 1 1 1 1 1 1 1 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + rowLengths.setElement( i, 1 ); + + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + switch( rowIdx ) + { + case 0: + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, 2 * i, 1 ); + break; + case 1: + for( IndexType i = 0; i < 3; i++ ) + row.setElement( i, i, 1 ); + break; + case 2: + for( IndexType i = 0; i < 8; i++ ) + row.setElement( i, i, 1 ); + break; + case 3: + for( IndexType i = 0; i < 2; i++ ) + row.setElement( i, i, 1 ); + break; + case 4: + row.setElement( 0, 0, 1 ); + break; + case 5: + row.setElement( 0, 0, 1 ); + break; + case 6: + row.setElement( 0, 0, 1 ); + break; + case 7: + row.setElement( 0, 0, 1 ); + break; + case 8: + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, 1 ); + break; + case 9: + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, 1 ); + break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 1 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 1 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 1 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 1 ); + EXPECT_EQ( m.getElement( 2, 6 ), 1 ); + EXPECT_EQ( m.getElement( 2, 7 ), 1 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 1 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 1 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 1 ); + EXPECT_EQ( m.getElement( 8, 1 ), 1 ); + EXPECT_EQ( m.getElement( 8, 2 ), 1 ); + EXPECT_EQ( m.getElement( 8, 3 ), 1 ); + EXPECT_EQ( m.getElement( 8, 4 ), 1 ); + EXPECT_EQ( m.getElement( 8, 5 ), 1 ); + EXPECT_EQ( m.getElement( 8, 6 ), 1 ); + EXPECT_EQ( m.getElement( 8, 7 ), 1 ); + EXPECT_EQ( m.getElement( 8, 8 ), 1 ); + EXPECT_EQ( m.getElement( 8, 9 ), 1 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 1 ); + EXPECT_EQ( m.getElement( 9, 1 ), 1 ); + EXPECT_EQ( m.getElement( 9, 2 ), 1 ); + EXPECT_EQ( m.getElement( 9, 3 ), 1 ); + EXPECT_EQ( m.getElement( 9, 4 ), 1 ); + EXPECT_EQ( m.getElement( 9, 5 ), 1 ); + EXPECT_EQ( m.getElement( 9, 6 ), 1 ); + EXPECT_EQ( m.getElement( 9, 7 ), 1 ); + EXPECT_EQ( m.getElement( 9, 8 ), 1 ); + EXPECT_EQ( m.getElement( 9, 9 ), 1 ); +} + + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 1 0 1 0 1 0 0 0 \ + * | 1 1 1 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 0 0 | + * | 1 1 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 0 0 0 0 0 0 0 0 0 | + * | 1 1 1 1 1 1 1 1 1 1 | + * \ 1 1 1 1 1 1 1 1 1 1 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + rowLengths.setElement( i, 1 ); + + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, 1 ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, 1 ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, 1 ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, 1 ); + + for( IndexType j = 8; j < rows; j++) + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, 1 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 1 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 1 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 1 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 1 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 1 ); + EXPECT_EQ( m.getElement( 2, 1 ), 1 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 1 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 1 ); + EXPECT_EQ( m.getElement( 2, 6 ), 1 ); + EXPECT_EQ( m.getElement( 2, 7 ), 1 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 1 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 1 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 1 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 1 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 1 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 1 ); + EXPECT_EQ( m.getElement( 8, 1 ), 1 ); + EXPECT_EQ( m.getElement( 8, 2 ), 1 ); + EXPECT_EQ( m.getElement( 8, 3 ), 1 ); + EXPECT_EQ( m.getElement( 8, 4 ), 1 ); + EXPECT_EQ( m.getElement( 8, 5 ), 1 ); + EXPECT_EQ( m.getElement( 8, 6 ), 1 ); + EXPECT_EQ( m.getElement( 8, 7 ), 1 ); + EXPECT_EQ( m.getElement( 8, 8 ), 1 ); + EXPECT_EQ( m.getElement( 8, 9 ), 1 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 1 ); + EXPECT_EQ( m.getElement( 9, 1 ), 1 ); + EXPECT_EQ( m.getElement( 9, 2 ), 1 ); + EXPECT_EQ( m.getElement( 9, 3 ), 1 ); + EXPECT_EQ( m.getElement( 9, 4 ), 1 ); + EXPECT_EQ( m.getElement( 9, 5 ), 1 ); + EXPECT_EQ( m.getElement( 9, 6 ), 1 ); + EXPECT_EQ( m.getElement( 9, 7 ), 1 ); + EXPECT_EQ( m.getElement( 9, 8 ), 1 ); + EXPECT_EQ( m.getElement( 9, 9 ), 1 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 1 0 1 | + * | 0 1 0 0 | + * \ 0 0 1 0 / + */ + + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; + + Matrix m_1( m_rows_1, m_cols_1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1; + rowLengths_1.setSize( m_rows_1 ); + rowLengths_1.setElement( 0, 1 ); + rowLengths_1.setElement( 1, 2 ); + rowLengths_1.setElement( 2, 1 ); + rowLengths_1.setElement( 3, 1 ); + m_1.setCompressedRowLengths( rowLengths_1 ); + + m_1.setElement( 0, 0, 1 ); // 0th row + + m_1.setElement( 1, 1, 1 ); // 1st row + m_1.setElement( 1, 3, 1 ); + + m_1.setElement( 2, 1, 1 ); // 2nd row + + m_1.setElement( 3, 2, 1 ); // 3rd row + + VectorType inVector_1( m_cols_1 ); + inVector_1 = 2.0; + + VectorType outVector_1( m_rows_1 ); + outVector_1 = 0.0; + + m_1.vectorProduct( inVector_1, outVector_1 ); + + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 4 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 2 ); + + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 1 1 0 \ + * | 0 0 0 1 | + * | 1 1 1 0 | + * \ 0 1 0 0 / + */ + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; + + Matrix m_2( m_rows_2, m_cols_2 ); + typename Matrix::CompressedRowLengthsVector rowLengths_2; + rowLengths_2.setSize( m_rows_2 ); + rowLengths_2.setValue( 3 ); + rowLengths_2.setElement( 1, 1 ); + rowLengths_2.setElement( 3, 1 ); + m_2.setCompressedRowLengths( rowLengths_2 ); + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_2.setElement( 0, i, 1 ); + + m_2.setElement( 1, 3, 1 ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_2.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < 2; i++ ) // 3rd row + m_2.setElement( 3, i, 1 ); + + VectorType inVector_2( m_cols_2 ); + inVector_2 = 2.0; + + VectorType outVector_2( m_rows_2 ); + outVector_2 = 0.0; + + m_2.vectorProduct( inVector_2, outVector_2 ); + + EXPECT_EQ( outVector_2.getElement( 0 ), 6 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 2 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 6 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 2 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 1 1 0 \ + * | 0 1 1 1 | + * | 1 1 1 0 | + * \ 0 1 1 1 / + */ + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; + + Matrix m_3( m_rows_3, m_cols_3 ); + typename Matrix::CompressedRowLengthsVector rowLengths_3; + rowLengths_3.setSize( m_rows_3 ); + rowLengths_3.setValue( 3 ); + m_3.setCompressedRowLengths( rowLengths_3 ); + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_3.setElement( 0, i, 1 ); + + for( IndexType i = 1; i < 4; i++ ) + m_3.setElement( 1, i, 1 ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_3.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < 4; i++ ) // 3rd row + m_3.setElement( 3, i, 1 ); + + VectorType inVector_3( m_cols_3 ); + inVector_3 = 2.0; + + VectorType outVector_3( m_rows_3 ); + outVector_3 = 0.0; + + m_3.vectorProduct( inVector_3, outVector_3 ); + + + EXPECT_EQ( outVector_3.getElement( 0 ), 6 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 6 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 6 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 6 ); + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 1 1 0 0 1 0 0 \ + * | 0 1 1 1 1 0 0 0 | + * | 1 1 1 1 1 0 0 0 | + * | 0 1 1 1 1 0 0 0 | + * | 0 0 1 1 1 1 0 0 | + * | 0 0 0 1 1 1 1 0 | + * | 1 1 1 1 1 0 0 0 | + * \ 1 1 1 1 1 0 0 0 / + */ + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; + + Matrix m_4( m_rows_4, m_cols_4 ); + typename Matrix::CompressedRowLengthsVector rowLengths_4; + rowLengths_4.setSize( m_rows_4 ); + rowLengths_4.setValue( 4 ); + rowLengths_4.setElement( 2, 5 ); + rowLengths_4.setElement( 6, 5 ); + rowLengths_4.setElement( 7, 5 ); + m_4.setCompressedRowLengths( rowLengths_4 ); + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_4.setElement( 0, i, 1 ); + + m_4.setElement( 0, 5, 1 ); + + for( IndexType i = 1; i < 5; i++ ) // 1st row + m_4.setElement( 1, i, 1 ); + + for( IndexType i = 0; i < 5; i++ ) // 2nd row + m_4.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_4.setElement( 3, i, 1 ); + + for( IndexType i = 2; i < 6; i++ ) // 4th row + m_4.setElement( 4, i, 1 ); + + for( IndexType i = 3; i < 7; i++ ) // 5th row + m_4.setElement( 5, i, 1 ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m_4.setElement( 6, i, 1 ); + + for( IndexType i = 0; i < 5; i++ ) // 7th row + m_4.setElement( 7, i, 1 ); + + VectorType inVector_4( m_cols_4 ); + inVector_4 = 2.0; + + VectorType outVector_4( m_rows_4 ); + outVector_4 = 0.0; + + m_4.vectorProduct( inVector_4, outVector_4 ); + + + EXPECT_EQ( outVector_4.getElement( 0 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 10 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 8 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 10 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 10 ); + + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 1 1 0 1 1 0 1 \ 6 + * | 0 1 0 1 0 0 0 1 | 3 + * | 0 1 1 0 1 0 0 1 | 4 + * | 0 1 1 1 1 0 0 1 | 5 + * | 0 1 0 0 0 0 0 1 | 2 + * | 0 1 1 1 1 1 1 1 | 7 + * | 1 1 1 1 1 1 1 1 | 8 + * \ 1 1 1 1 1 1 1 1 / 8 + */ + + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5( m_rows_5, m_cols_5 ); + typename Matrix::CompressedRowLengthsVector rowLengths_5; + rowLengths_5.setSize( m_rows_5 ); + rowLengths_5.setElement(0, 6); + rowLengths_5.setElement(1, 3); + rowLengths_5.setElement(2, 4); + rowLengths_5.setElement(3, 5); + rowLengths_5.setElement(4, 2); + rowLengths_5.setElement(5, 7); + rowLengths_5.setElement(6, 8); + rowLengths_5.setElement(7, 8); + m_5.setCompressedRowLengths( rowLengths_5 ); + + RealType value_5 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_5.setElement( 0, i, 1 ); + + m_5.setElement( 0, 4, 1 ); // 0th row + m_5.setElement( 0, 5, 1 ); + + m_5.setElement( 1, 1, 1 ); // 1st row + m_5.setElement( 1, 3, 1 ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_5.setElement( 2, i, 1 ); + + m_5.setElement( 2, 4, 1 ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_5.setElement( 3, i, 1 ); + + m_5.setElement( 4, 1, 1 ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_5.setElement( 5, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_5.setElement( 6, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_5.setElement( 7, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m_5.setElement( i, 7, 1); + + VectorType inVector_5( m_cols_5 ); + inVector_5 = 2.0; + + VectorType outVector_5( m_rows_5 ); + outVector_5 = 0.0; + + m_5.vectorProduct( inVector_5, outVector_5 ); + + EXPECT_EQ( outVector_5.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 6 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 8 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 10 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 4 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 14 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 16 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 16 ); +} + +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 1 1 0 1 1 0 1 \ 6 + * | 0 1 0 1 0 0 0 1 | 3 + * | 0 1 1 0 1 0 0 1 | 4 + * | 0 1 1 1 1 0 0 1 | 5 + * | 0 1 0 0 0 0 0 1 | 2 + * | 0 1 1 1 1 1 1 1 | 7 + * | 1 1 1 1 1 1 1 1 | 8 + * \ 1 1 1 1 1 1 1 1 / 8 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m( rows, cols ); + typename Matrix::RowsCapacitiesType rowsCapacities( rows ); + //rowLengths.setSize( rows ); + rowsCapacities.setElement(0, 6); + rowsCapacities.setElement(1, 3); + rowsCapacities.setElement(2, 4); + rowsCapacities.setElement(3, 5); + rowsCapacities.setElement(4, 2); + rowsCapacities.setElement(5, 7); + rowsCapacities.setElement(6, 8); + rowsCapacities.setElement(7, 8); + m.setCompressedRowLengths( rowsCapacities ); + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, 1 ); + + m.setElement( 0, 4, 1 ); // 0th row + m.setElement( 0, 5, 1 ); + + m.setElement( 1, 1, 1 ); // 1st row + m.setElement( 1, 3, 1 ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m.setElement( 2, i, 1 ); + + m.setElement( 2, 4, 1 ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m.setElement( 3, i, 1 ); + + m.setElement( 4, 1, 1 ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m.setElement( 5, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m.setElement( 6, i, 1 ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m.setElement( 7, i, 1 ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m.setElement( i, 7, 1); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( rows ); + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + m.allRowsReduction( fetch, reduce, keep, 0 ); + EXPECT_EQ( rowsCapacities, rowLengths ); + m.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowsCapacities, rowLengths ); + + //// + // Compute max norm + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 8 ) ; // 29+30+31+32+33+34+35+36 +} + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 1 0 0 \ + * | 1 1 1 0 | + * | 0 1 1 1 | + * \ 0 0 1 1 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( m_rows ); + rowLengths.setValue( 3 ); + m.setCompressedRowLengths( rowLengths ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 2, 1.0 ); // 3rd row + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + + IndexType row = 0; + RealType omega = 1; + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); +} + +template< typename Matrix > +void test_SaveAndLoad( const char* filename ) +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 1 1 0 \ + * | 0 1 0 1 | + * | 1 1 1 0 | + * \ 0 1 1 1 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix savedMatrix( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); + savedMatrix.setCompressedRowLengths( rowLengths ); + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + savedMatrix.setElement( 0, i, 1 ); + + savedMatrix.setElement( 1, 1, 1 ); + savedMatrix.setElement( 1, 3, 1 ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + savedMatrix.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + savedMatrix.setElement( 3, i, 1 ); + + ASSERT_NO_THROW( savedMatrix.save( filename ) ); + + Matrix loadedMatrix; + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 1 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 1 ); + + EXPECT_EQ( std::remove( filename ), 0 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 1 1 0 \ + * | 0 0 0 1 | + * | 1 1 1 0 | + * | 0 1 1 1 | + * \ 0 0 1 1 / + */ + + const IndexType m_rows = 5; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + m.setElement( 0, i, 1 ); + + m.setElement( 1, 3, 1 ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + m.setElement( 2, i, 1 ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + m.setElement( 3, i, 1 ); + + for( IndexType i = 2; i < m_cols; i++ ) // 4th row + m.setElement( 4, i, 1 ); + + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1 Col:1->1 Col:2->1\t\n" + "Row: 1 -> Col:3->1\t\n" + "Row: 2 -> Col:0->1 Col:1->1 Col:2->1\t\n" + "Row: 3 -> Col:1->1 Col:2->1 Col:3->1\t\n" + "Row: 4 -> Col:2->1 Col:3->1\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +#endif diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b1f4891053e1f1ad51873d3e6f8797d8477914b2 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_CSR.cpp - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_CSR.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu new file mode 100644 index 0000000000000000000000000000000000000000..496bdde1b301ab06cd73221b17acb27ba7a80b34 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_CSR.cu - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_CSR.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h new file mode 100644 index 0000000000000000000000000000000000000000..a853281bef7b1bb9a3cb4985b6a3a53ba519ee45 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h @@ -0,0 +1,127 @@ +/*************************************************************************** + BinarySparseMatrixTest_CSR.h - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Matrices/SparseMatrix.h> + + +#include "BinarySparseMatrixTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Matrix > +class BinaryMatrixTest_CSR : public ::testing::Test +{ +protected: + using CSRMatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using CSRMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::BinaryMatrix, TNL::Containers::Segments::CSR > +#endif +>; + +TYPED_TEST_SUITE( BinaryMatrixTest_CSR, CSRMatrixTypes); + +TYPED_TEST( BinaryMatrixTest_CSR, setDimensionsTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetDimensions< CSRMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_CSR, setCompressedRowLengthsTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetCompressedRowLengths< CSRMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_CSR, setLikeTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetLike< CSRMatrixType, CSRMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_CSR, resetTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Reset< CSRMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_CSR, getRowTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_GetRow< CSRMatrixType >(); +} + + +TYPED_TEST( BinaryMatrixTest_CSR, setElementTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetElement< CSRMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_CSR, vectorProductTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_VectorProduct< CSRMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_CSR, rowsReduction ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_RowsReduction< CSRMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_CSR, saveAndLoadTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SaveAndLoad< CSRMatrixType >( "test_BinarySparseMatrixTest_CSR" ); +} + +TYPED_TEST( BinaryMatrixTest_CSR, printTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Print< CSRMatrixType >(); +} + +#endif + +#include "../main.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b1d5d71cf681fda25c0b0dde127dcc20c795a039 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_Ellpack.cpp - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_Ellpack.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu new file mode 100644 index 0000000000000000000000000000000000000000..8d075f1cf4539b741be6e1693abe8ddd28766f74 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_Ellpack.cu - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_Ellpack.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h new file mode 100644 index 0000000000000000000000000000000000000000..3c0a65cfda5dbc98edc099fcb39a6334afebd0f3 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h @@ -0,0 +1,138 @@ +/*************************************************************************** + BinarySparseMatrixTest_Ellpack.h - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Matrices/SparseMatrix.h> + + +#include "BinarySparseMatrixTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Matrix > +class BinaryMatrixTest_Ellpack : public ::testing::Test +{ +protected: + using EllpackMatrixType = Matrix; +}; + +//// +// Row-major format is used for the host system +template< typename Device, typename Index, typename IndexAlocator > +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >; + +// types for which MatrixTest is instantiated +using EllpackMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack > +#endif +>; + +TYPED_TEST_SUITE( BinaryMatrixTest_Ellpack, EllpackMatrixTypes); + +TYPED_TEST( BinaryMatrixTest_Ellpack, setDimensionsTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetDimensions< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, setCompressedRowLengthsTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetCompressedRowLengths< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, setLikeTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetLike< EllpackMatrixType, EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, resetTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Reset< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, getRowTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_GetRow< EllpackMatrixType >(); +} + + +TYPED_TEST( BinaryMatrixTest_Ellpack, setElementTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetElement< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, vectorProductTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_VectorProduct< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, rowsReduction ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_RowsReduction< EllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, saveAndLoadTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SaveAndLoad< EllpackMatrixType >( "test_BinarySparseMatrixTest_Ellpack" ); +} + +TYPED_TEST( BinaryMatrixTest_Ellpack, printTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Print< EllpackMatrixType >(); +} + +#endif + +#include "../main.h" diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7046d815629444d3275eaae4b44d429f0ea8c3f7 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_SlicedEllpack.cpp - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_SlicedEllpack.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu new file mode 100644 index 0000000000000000000000000000000000000000..bb6829310fc45f5679bf658a9a792b72dbb0b6b9 --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + BinarySparseMatrixTest_SlicedEllpack.cu - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "BinarySparseMatrixTest_SlicedEllpack.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..98c5f65ae63fcec30b91d1cbc2efd67e5586e32a --- /dev/null +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h @@ -0,0 +1,138 @@ +/*************************************************************************** + BinarySparseMatrixTest_SlicedEllpack.h - description + ------------------- + begin : Jan 30, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Segments/SlicedEllpack.h> +#include <TNL/Matrices/SparseMatrix.h> + + +#include "BinarySparseMatrixTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Matrix > +class BinaryMatrixTest_SlicedEllpack : public ::testing::Test +{ +protected: + using SlicedEllpackMatrixType = Matrix; +}; + +//// +// Row-major format is used for the host system +template< typename Device, typename Index, typename IndexAllocator > +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >; + +// types for which MatrixTest is instantiated +using SlicedEllpackMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack > +#endif +>; + +TYPED_TEST_SUITE( BinaryMatrixTest_SlicedEllpack, SlicedEllpackMatrixTypes); + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setDimensionsTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetDimensions< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setCompressedRowLengthsTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setLikeTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, resetTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Reset< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, getRowTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_GetRow< SlicedEllpackMatrixType >(); +} + + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setElementTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetElement< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, vectorProductTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_VectorProduct< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, rowsReduction ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_RowsReduction< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, saveAndLoadTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SaveAndLoad< SlicedEllpackMatrixType >( "test_BinarySparseMatrixTest" ); +} + +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, printTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Print< SlicedEllpackMatrixType >(); +} + +#endif + +#include "../main.h" diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 2a08be2198e1dcbff5de4ccacccae38e2f52f17b..1c536a98210b59789d2a7b34a9b9935150a7e0ac 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -1,18 +1,17 @@ -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) +ADD_SUBDIRECTORY( Legacy ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) +IF( BUILD_CUDA ) + CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( DenseMatrixCopyTest DenseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( DenseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) @@ -23,28 +22,40 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) - CUDA_ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) -ELSE( BUILD_CUDA ) - ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + CUDA_ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest SparseMatrixTest.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_CSR BinarySparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_Ellpack BinarySparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + +ELSE( BUILD_CUDA ) + ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( DenseMatrixCopyTest DenseMatrixCopyTest.cpp ) + TARGET_COMPILE_OPTIONS( DenseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( DenseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( TridiagonalMatrixTest TridiagonalMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( TridiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( TridiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp ) - TARGET_COMPILE_OPTIONS( SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( MultidiagonalMatrixTest MultidiagonalMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( MultidiagonalMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( MultidiagonalMatrixTest ${GTEST_BOTH_LIBRARIES} ) ADD_EXECUTABLE( SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) @@ -58,24 +69,45 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) - ADD_EXECUTABLE( DenseMatrixTest DenseMatrixTest.cpp ) - TARGET_COMPILE_OPTIONS( DenseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) - TARGET_LINK_LIBRARIES( DenseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( BinarySparseMatrixTest_CSR BinarySparseMatrixTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( BinarySparseMatrixTest_Ellpack BinarySparseMatrixTest_Ellpack.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( BinarySparseMatrixTest_SlicedEllpack BinarySparseMatrixTest_SlicedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( BinarySparseMatrixCopyTest BinarySparseMatrixCopyTest.cpp ) + TARGET_COMPILE_OPTIONS( BinarySparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( BinarySparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( SymmetricSparseMatrixTest_CSR SymmetricSparseMatrixTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( SymmetricSparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SymmetricSparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + ENDIF( BUILD_CUDA ) +ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( TridiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/TridiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( MultidiagonalMatrixTest ${EXECUTABLE_OUTPUT_PATH}/MultidiagonalMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) -# TODO: Uncomment the following when AdEllpack works -#ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -ADD_TEST( SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) -# TODO: DenseMatrixTest is not finished -#ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) - +ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( BinarySparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/BinarySparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SymmetricSparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SymmetricSparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp b/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5cdd413af60773cddf070493509b0be861c97018 --- /dev/null +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + DenseMatrixCopyTest.cpp - description + ------------------- + begin : Jan 19, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "DenseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.cu b/src/UnitTests/Matrices/DenseMatrixCopyTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..5fbd77efa119a0bde37f52d0aeecbccb1581262b --- /dev/null +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + DenseMatrixCopyTest.cu - description + ------------------- + begin : Jan 19, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "DenseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h new file mode 100644 index 0000000000000000000000000000000000000000..3ef31f1075beb311374ad0a45e4a4aff7d2641eb --- /dev/null +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h @@ -0,0 +1,657 @@ +/*************************************************************************** + DenseMatrixCopyTest.h - description + ------------------- + begin : Jan 19, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Matrices/Legacy/CSR.h> +#include <TNL/Matrices/Legacy/Ellpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> + +#include <TNL/Matrices/SparseMatrix.h> +#include <TNL/Matrices/MatrixType.h> +#include <TNL/Matrices/Dense.h> +#include <TNL/Matrices/Tridiagonal.h> +#include <TNL/Matrices/Multidiagonal.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Containers/Segments/SlicedEllpack.h> + +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using Dense_host = TNL::Matrices::Dense< int, TNL::Devices::Host, int, false >; +using Dense_host_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Host, int, true >; +using Dense_cuda = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, false >; +using Dense_cuda_RowMajorOrder = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, true >; + + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +/* + * Sets up the following 10x6 sparse matrix: + * + * / 1 2 \ + * | 3 4 5 | + * | 6 7 8 | + * | 9 10 11 12 13 | + * | 14 15 16 17 18 | + * | 19 20 | + * | 21 | + * | 22 | + * | 23 24 25 26 27 | + * \ 28 / + */ +template< typename Matrix > +void setupUnevenRowSizeMatrix( Matrix& m ) +{ + const int rows = 10; + const int cols = 6; + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 5 ); + rowLengths.setElement( 0, 2 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 3 ); + rowLengths.setElement( 5, 2 ); + rowLengths.setElement( 6, 1 ); + rowLengths.setElement( 7, 1 ); + rowLengths.setElement( 9, 1 ); + m.setCompressedRowLengths( rowLengths ); + + int value = 1; + for( int i = 0; i < cols - 4; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( int i = 3; i < cols; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( int i = 0; i < cols - 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( int i = 1; i < cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( int i = 0; i < cols - 1; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( int i = 0; i < cols - 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + m.setElement( 6, 0, value++ ); // 6th row + + m.setElement( 7, 0, value++ ); // 7th row + + for( int i = 0; i < cols - 1; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + m.setElement( 9, 5, value++ ); // 9th row +} + +template< typename Matrix > +void checkUnevenRowSizeMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 10 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 3 ); + EXPECT_EQ( m.getElement( 1, 4 ), 4 ); + EXPECT_EQ( m.getElement( 1, 5 ), 5 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 6 ); + EXPECT_EQ( m.getElement( 2, 1 ), 7 ); + EXPECT_EQ( m.getElement( 2, 2 ), 8 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 9 ); + EXPECT_EQ( m.getElement( 3, 2 ), 10 ); + EXPECT_EQ( m.getElement( 3, 3 ), 11 ); + EXPECT_EQ( m.getElement( 3, 4 ), 12 ); + EXPECT_EQ( m.getElement( 3, 5 ), 13 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 14 ); + EXPECT_EQ( m.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m.getElement( 4, 2 ), 16 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); + EXPECT_EQ( m.getElement( 4, 4 ), 18 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 20 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 21 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 22 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 23 ); + EXPECT_EQ( m.getElement( 8, 1 ), 24 ); + EXPECT_EQ( m.getElement( 8, 2 ), 25 ); + EXPECT_EQ( m.getElement( 8, 3 ), 26 ); + EXPECT_EQ( m.getElement( 8, 4 ), 27 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 0 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 0 ); + EXPECT_EQ( m.getElement( 9, 5 ), 28 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 2 1 \ + * | 5 4 3 | + * | 8 7 6 | + * | 11 10 9 | + * | 14 13 12 | + * | 16 15 | + * \ 17 / + */ +template< typename Matrix > +void setupAntiTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0, 4); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = cols - 1; j > 2; j-- ) + if( j - i + 1 < cols && j - i + 1 >= 0 ) + m.setElement( i, j - i + 1, value++ ); +} + +template< typename Matrix > +void checkAntiTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 2 ); + EXPECT_EQ( m.getElement( 0, 5 ), 1); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 5 ); + EXPECT_EQ( m.getElement( 1, 4 ), 4 ); + EXPECT_EQ( m.getElement( 1, 5 ), 3 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 8 ); + EXPECT_EQ( m.getElement( 2, 3 ), 7 ); + EXPECT_EQ( m.getElement( 2, 4 ), 6 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m.getElement( 3, 2 ), 10 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 14 ); + EXPECT_EQ( m.getElement( 4, 1 ), 13 ); + EXPECT_EQ( m.getElement( 4, 2 ), 12 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 16 ); + EXPECT_EQ( m.getElement( 5, 1 ), 15 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 17 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 1 2 \ + * | 3 4 5 | + * | 6 7 8 | + * | 9 10 11 | + * | 12 13 14 | + * | 15 16 | + * \ 17 / + */ +template< typename Matrix > +void setupTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0 , 4 ); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = 0; j < 3; j++ ) + if( i + j - 1 >= 0 && i + j - 1 < cols ) + m.setElement( i, i + j - 1, value++ ); +} + +template< typename Matrix > +void checkTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 3 ); + EXPECT_EQ( m.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 6 ); + EXPECT_EQ( m.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 9 ); + EXPECT_EQ( m.getElement( 3, 3 ), 10 ); + EXPECT_EQ( m.getElement( 3, 4 ), 11 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 12 ); + EXPECT_EQ( m.getElement( 4, 4 ), 13 ); + EXPECT_EQ( m.getElement( 4, 5 ), 14 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 15 ); + EXPECT_EQ( m.getElement( 5, 5 ), 16 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 17 ); +} + +template< typename Matrix1, typename Matrix2 > +void testCopyAssignment() +{ + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag2 ); + } + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +template< typename Matrix > +void tridiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + TridiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + TridiagonalCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + +template< typename Matrix > +void multidiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; + DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + + const IndexType rows( 10 ), columns( 10 ); + MultidiagonalHost hostMatrix( rows, columns, diagonals ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( diagonals.containsValue( j - i ) ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } + +#ifdef HAVE_CUDA + MultidiagonalCuda cudaMatrix( rows, columns, diagonals ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } +#endif +} + +template< typename Matrix > +void denseMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = TNL::Containers::Vector< IndexType, DeviceType, IndexType >; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + +TEST( DenseMatrixCopyTest, Dense_HostToDense_Host ) +{ + testCopyAssignment< Dense_host, Dense_host >(); + testCopyAssignment< Dense_host_RowMajorOrder, Dense_host >(); + testCopyAssignment< Dense_host, Dense_host_RowMajorOrder >(); + testCopyAssignment< Dense_host_RowMajorOrder, Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, Dense_HostToDense_Cuda ) +{ + testCopyAssignment< Dense_host, Dense_cuda >(); + testCopyAssignment< Dense_host_RowMajorOrder, Dense_cuda >(); + testCopyAssignment< Dense_host, Dense_cuda_RowMajorOrder >(); + testCopyAssignment< Dense_host_RowMajorOrder, Dense_cuda_RowMajorOrder >(); +} + +TEST( DenseMatrixCopyTest, Dense_CudaToDense_Host ) +{ + testCopyAssignment< Dense_cuda, Dense_host >(); + testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_host >(); + testCopyAssignment< Dense_cuda, Dense_host_RowMajorOrder >(); + testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_host_RowMajorOrder >(); +} + +TEST( DenseMatrixCopyTest, Dense_CudaToDense_Cuda ) +{ + testCopyAssignment< Dense_cuda, Dense_cuda >(); + testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_cuda >(); + testCopyAssignment< Dense_cuda, Dense_cuda_RowMajorOrder >(); + testCopyAssignment< Dense_cuda_RowMajorOrder, Dense_cuda_RowMajorOrder >(); +} +#endif // HAVE_CUDA + + +TEST( DenseMatrixCopyTest, CSR_HostToDense_Host ) +{ + testCopyAssignment< CSR_host, Dense_host >(); + testCopyAssignment< CSR_host, Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, CSR_HostToDense_cuda ) +{ + testCopyAssignment< CSR_host, Dense_cuda >(); + testCopyAssignment< CSR_host, Dense_cuda_RowMajorOrder >(); +} + +TEST( DenseMatrixCopyTest, CSR_CudaToDense_host ) +{ + testCopyAssignment< CSR_cuda, Dense_host >(); + testCopyAssignment< CSR_cuda, Dense_host_RowMajorOrder >(); +} + +TEST( DenseMatrixCopyTest, CSR_CudaToDense_cuda ) +{ + testCopyAssignment< CSR_cuda, Dense_cuda >(); + testCopyAssignment< CSR_cuda, Dense_cuda_RowMajorOrder >(); +} +#endif + +//// +// Tridiagonal matrix assignment test +TEST( DenseMatrixCopyTest, TridiagonalMatrixAssignment_to_Dense_host ) +{ + tridiagonalMatrixAssignment< Dense_host >(); + tridiagonalMatrixAssignment< Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, TridiagonalMatrixAssignment_to_Dense_cuda ) +{ + tridiagonalMatrixAssignment< Dense_cuda >(); + tridiagonalMatrixAssignment< Dense_cuda_RowMajorOrder >(); +} +#endif // HAVE_CUDA + +//// +// Multidiagonal matrix assignment test +TEST( DenseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Dense_host ) +{ + multidiagonalMatrixAssignment< Dense_host >(); + multidiagonalMatrixAssignment< Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Dense_cuda ) +{ + multidiagonalMatrixAssignment< Dense_cuda >(); + multidiagonalMatrixAssignment< Dense_cuda_RowMajorOrder >(); +} +#endif // HAVE_CUDA + +//// +// Dense matrix assignment test +TEST( DenseMatrixCopyTest, DenseMatrixAssignment_to_Dense_host ) +{ + denseMatrixAssignment< Dense_host >(); + denseMatrixAssignment< Dense_host_RowMajorOrder >(); +} + +#ifdef HAVE_CUDA +TEST( DenseMatrixCopyTest, DenseMatrixAssignment_to_Dense_cuda ) +{ + denseMatrixAssignment< Dense_cuda >(); + denseMatrixAssignment< Dense_cuda_RowMajorOrder >(); +} +#endif // HAVE_CUDA + +#endif //HAVE_GTEST + +#include "../main.h" diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 8d9e9c727a0d88c40b90f0623d8b2ec8808e3f95..37ae58bf1a1e7e8b03220c4916ba79cf48729ef9 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -26,29 +26,21 @@ using Dense_cuda_int = TNL::Matrices::Dense< int, TNL::Devices::Cuda, int >; static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl"; -#ifdef HAVE_GTEST +#ifdef HAVE_GTEST #include <type_traits> #include <gtest/gtest.h> -template< typename MatrixHostFloat, typename MatrixHostInt > -void host_test_GetType() +void test_GetSerializationType() { - MatrixHostFloat mtrxHostFloat; - MatrixHostInt mtrxHostInt; - - EXPECT_EQ( mtrxHostFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Host, int >" ) ); - EXPECT_EQ( mtrxHostInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Host, int >" ) ); -} - -template< typename MatrixCudaFloat, typename MatrixCudaInt > -void cuda_test_GetType() -{ - MatrixCudaFloat mtrxCudaFloat; - MatrixCudaInt mtrxCudaInt; - - EXPECT_EQ( mtrxCudaFloat.getType(), TNL::String( "Matrices::Dense< float, Devices::Cuda, int >" ) ); - EXPECT_EQ( mtrxCudaInt.getType(), TNL::String( "Matrices::Dense< int, Devices::Cuda, int >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Dense< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Dense< int, [any_device], int, false, [any_allocator] >" ) ); } template< typename Matrix > @@ -57,13 +49,13 @@ void test_SetDimensions() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 9; const IndexType cols = 8; - + Matrix m; m.setDimensions( rows, cols ); - + EXPECT_EQ( m.getRows(), 9 ); EXPECT_EQ( m.getColumns(), 8 ); } @@ -74,38 +66,90 @@ void test_SetLike() using RealType = typename Matrix1::RealType; using DeviceType = typename Matrix1::DeviceType; using IndexType = typename Matrix1::IndexType; - + const IndexType rows = 8; const IndexType cols = 7; - + Matrix1 m1; m1.reset(); m1.setDimensions( rows + 1, cols + 2 ); - + Matrix2 m2; m2.reset(); m2.setDimensions( rows, cols ); - + m1.setLike( m2 ); - + EXPECT_EQ( m1.getRows(), m2.getRows() ); EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } +template< typename Matrix > +void test_GetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + + // Insert values into the rows. + RealType value = 1; + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, value++ ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + template< typename Matrix > void test_GetRowLength() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 8; const IndexType cols = 7; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + EXPECT_EQ( m.getRowLength( 0 ), 7 ); EXPECT_EQ( m.getRowLength( 1 ), 7 ); EXPECT_EQ( m.getRowLength( 2 ), 7 ); @@ -122,14 +166,14 @@ void test_GetNumberOfMatrixElements() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 7; const IndexType cols = 6; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + EXPECT_EQ( m.getNumberOfMatrixElements(), 42 ); } @@ -139,7 +183,7 @@ void test_GetNumberOfNonzeroMatrixElements() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 7x6 dense matrix: * @@ -153,19 +197,19 @@ void test_GetNumberOfNonzeroMatrixElements() */ const IndexType rows = 7; const IndexType cols = 6; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) m.setElement( i, j, value++ ); - + m.setElement( 0, 0, 0); // Set the first element of the diagonal to 0. m.setElement( 6, 5, 0); // Set the last element of the diagonal to 0. - + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 40 ); } @@ -175,7 +219,7 @@ void test_Reset() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 5x4 dense matrix: * @@ -187,12 +231,12 @@ void test_Reset() */ const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.setDimensions( rows, cols ); - + m.reset(); - + EXPECT_EQ( m.getRows(), 0 ); EXPECT_EQ( m.getColumns(), 0 ); } @@ -202,7 +246,7 @@ void test_SetValue() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; + using IndexType = typename Matrix::IndexType; /* * Sets up the following 7x6 dense matrix: * @@ -216,110 +260,110 @@ void test_SetValue() */ const IndexType rows = 7; const IndexType cols = 6; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) m.setElement( i, j, value++ ); - + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 4 ); EXPECT_EQ( m.getElement( 0, 4 ), 5 ); EXPECT_EQ( m.getElement( 0, 5 ), 6 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 7 ); EXPECT_EQ( m.getElement( 1, 1 ), 8 ); EXPECT_EQ( m.getElement( 1, 2 ), 9 ); EXPECT_EQ( m.getElement( 1, 3 ), 10 ); EXPECT_EQ( m.getElement( 1, 4 ), 11 ); EXPECT_EQ( m.getElement( 1, 5 ), 12 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 13 ); EXPECT_EQ( m.getElement( 2, 1 ), 14 ); EXPECT_EQ( m.getElement( 2, 2 ), 15 ); EXPECT_EQ( m.getElement( 2, 3 ), 16 ); EXPECT_EQ( m.getElement( 2, 4 ), 17 ); EXPECT_EQ( m.getElement( 2, 5 ), 18 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 19 ); EXPECT_EQ( m.getElement( 3, 1 ), 20 ); EXPECT_EQ( m.getElement( 3, 2 ), 21 ); EXPECT_EQ( m.getElement( 3, 3 ), 22 ); EXPECT_EQ( m.getElement( 3, 4 ), 23 ); EXPECT_EQ( m.getElement( 3, 5 ), 24 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 25 ); EXPECT_EQ( m.getElement( 4, 1 ), 26 ); EXPECT_EQ( m.getElement( 4, 2 ), 27 ); EXPECT_EQ( m.getElement( 4, 3 ), 28 ); EXPECT_EQ( m.getElement( 4, 4 ), 29 ); EXPECT_EQ( m.getElement( 4, 5 ), 30 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 31 ); EXPECT_EQ( m.getElement( 5, 1 ), 32 ); EXPECT_EQ( m.getElement( 5, 2 ), 33 ); EXPECT_EQ( m.getElement( 5, 3 ), 34 ); EXPECT_EQ( m.getElement( 5, 4 ), 35 ); EXPECT_EQ( m.getElement( 5, 5 ), 36 ); - + EXPECT_EQ( m.getElement( 6, 0 ), 37 ); EXPECT_EQ( m.getElement( 6, 1 ), 38 ); EXPECT_EQ( m.getElement( 6, 2 ), 39 ); EXPECT_EQ( m.getElement( 6, 3 ), 40 ); EXPECT_EQ( m.getElement( 6, 4 ), 41 ); EXPECT_EQ( m.getElement( 6, 5 ), 42 ); - + // Set the values of all elements to a certain number m.setValue( 42 ); - + EXPECT_EQ( m.getElement( 0, 0 ), 42 ); EXPECT_EQ( m.getElement( 0, 1 ), 42 ); EXPECT_EQ( m.getElement( 0, 2 ), 42 ); EXPECT_EQ( m.getElement( 0, 3 ), 42 ); EXPECT_EQ( m.getElement( 0, 4 ), 42 ); EXPECT_EQ( m.getElement( 0, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 42 ); EXPECT_EQ( m.getElement( 1, 1 ), 42 ); EXPECT_EQ( m.getElement( 1, 2 ), 42 ); EXPECT_EQ( m.getElement( 1, 3 ), 42 ); EXPECT_EQ( m.getElement( 1, 4 ), 42 ); EXPECT_EQ( m.getElement( 1, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 42 ); EXPECT_EQ( m.getElement( 2, 1 ), 42 ); EXPECT_EQ( m.getElement( 2, 2 ), 42 ); EXPECT_EQ( m.getElement( 2, 3 ), 42 ); EXPECT_EQ( m.getElement( 2, 4 ), 42 ); EXPECT_EQ( m.getElement( 2, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 42 ); EXPECT_EQ( m.getElement( 3, 1 ), 42 ); EXPECT_EQ( m.getElement( 3, 2 ), 42 ); EXPECT_EQ( m.getElement( 3, 3 ), 42 ); EXPECT_EQ( m.getElement( 3, 4 ), 42 ); EXPECT_EQ( m.getElement( 3, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 42 ); EXPECT_EQ( m.getElement( 4, 1 ), 42 ); EXPECT_EQ( m.getElement( 4, 2 ), 42 ); EXPECT_EQ( m.getElement( 4, 3 ), 42 ); EXPECT_EQ( m.getElement( 4, 4 ), 42 ); EXPECT_EQ( m.getElement( 4, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 42 ); EXPECT_EQ( m.getElement( 5, 1 ), 42 ); EXPECT_EQ( m.getElement( 5, 2 ), 42 ); EXPECT_EQ( m.getElement( 5, 3 ), 42 ); EXPECT_EQ( m.getElement( 5, 4 ), 42 ); EXPECT_EQ( m.getElement( 5, 5 ), 42 ); - + EXPECT_EQ( m.getElement( 6, 0 ), 42 ); EXPECT_EQ( m.getElement( 6, 1 ), 42 ); EXPECT_EQ( m.getElement( 6, 2 ), 42 ); @@ -345,40 +389,40 @@ void test_SetElement() */ const IndexType rows = 5; const IndexType cols = 5; - + Matrix m; m.reset(); - m.setDimensions( rows, cols ); - + m.setDimensions( rows, cols ); + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) m.setElement( i, j, value++ ); - + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 4 ); EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); EXPECT_EQ( m.getElement( 1, 3 ), 9 ); EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); EXPECT_EQ( m.getElement( 2, 1 ), 12 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); EXPECT_EQ( m.getElement( 3, 1 ), 17 ); EXPECT_EQ( m.getElement( 3, 2 ), 18 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); EXPECT_EQ( m.getElement( 4, 1 ), 22 ); EXPECT_EQ( m.getElement( 4, 2 ), 23 ); @@ -404,53 +448,53 @@ void test_AddElement() */ const IndexType rows = 6; const IndexType cols = 5; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) m.setElement( i, j, value++ ); - + // Check the added elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 4 ); EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); EXPECT_EQ( m.getElement( 1, 1 ), 7 ); EXPECT_EQ( m.getElement( 1, 2 ), 8 ); EXPECT_EQ( m.getElement( 1, 3 ), 9 ); EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); EXPECT_EQ( m.getElement( 2, 1 ), 12 ); EXPECT_EQ( m.getElement( 2, 2 ), 13 ); EXPECT_EQ( m.getElement( 2, 3 ), 14 ); EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); EXPECT_EQ( m.getElement( 3, 1 ), 17 ); EXPECT_EQ( m.getElement( 3, 2 ), 18 ); EXPECT_EQ( m.getElement( 3, 3 ), 19 ); EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); EXPECT_EQ( m.getElement( 4, 1 ), 22 ); EXPECT_EQ( m.getElement( 4, 2 ), 23 ); EXPECT_EQ( m.getElement( 4, 3 ), 24 ); EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); EXPECT_EQ( m.getElement( 5, 1 ), 27 ); EXPECT_EQ( m.getElement( 5, 2 ), 28 ); EXPECT_EQ( m.getElement( 5, 3 ), 29 ); EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - + // Add new elements to the old elements with a multiplying factor applied to the old elements. /* * The following setup results in the following 6x5 dense matrix: @@ -466,38 +510,38 @@ void test_AddElement() RealType multiplicator = 2; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) - m.addElement( i, j, newValue++, multiplicator ); - + m.addElement( i, j, newValue++, multiplicator ); + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); EXPECT_EQ( m.getElement( 0, 1 ), 6 ); EXPECT_EQ( m.getElement( 0, 2 ), 9 ); EXPECT_EQ( m.getElement( 0, 3 ), 12 ); EXPECT_EQ( m.getElement( 0, 4 ), 15 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 18 ); EXPECT_EQ( m.getElement( 1, 1 ), 21 ); EXPECT_EQ( m.getElement( 1, 2 ), 24 ); EXPECT_EQ( m.getElement( 1, 3 ), 27 ); EXPECT_EQ( m.getElement( 1, 4 ), 30 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 33 ); EXPECT_EQ( m.getElement( 2, 1 ), 36 ); EXPECT_EQ( m.getElement( 2, 2 ), 39 ); EXPECT_EQ( m.getElement( 2, 3 ), 42 ); EXPECT_EQ( m.getElement( 2, 4 ), 45 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 48 ); EXPECT_EQ( m.getElement( 3, 1 ), 51 ); EXPECT_EQ( m.getElement( 3, 2 ), 54 ); EXPECT_EQ( m.getElement( 3, 3 ), 57 ); EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 63 ); EXPECT_EQ( m.getElement( 4, 1 ), 66 ); EXPECT_EQ( m.getElement( 4, 2 ), 69 ); EXPECT_EQ( m.getElement( 4, 3 ), 72 ); EXPECT_EQ( m.getElement( 4, 4 ), 75 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 78 ); EXPECT_EQ( m.getElement( 5, 1 ), 81 ); EXPECT_EQ( m.getElement( 5, 2 ), 84 ); @@ -508,189 +552,195 @@ void test_AddElement() template< typename Matrix > void test_SetRow() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 3x7 dense matrix: - * - * / 1 2 3 4 5 6 7 \ - * | 8 9 10 11 12 13 14 | - * \ 15 16 17 18 19 20 21 / - */ - const IndexType rows = 3; - const IndexType cols = 7; - - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); - - RealType row1 [ 5 ] = { 11, 11, 11, 11, 11 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row2 [ 5 ] = { 22, 22, 22, 22, 22 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row3 [ 5 ] = { 33, 33, 33, 33, 33 }; IndexType colIndexes3 [ 5 ] = { 2, 3, 4, 5, 6 }; - - IndexType row = 0; - IndexType elements = 5; - - m.setRow( row++, colIndexes1, row1, elements ); - m.setRow( row++, colIndexes2, row2, elements ); - m.setRow( row++, colIndexes3, row3, elements ); - - EXPECT_EQ( m.getElement( 0, 0 ), 11 ); - EXPECT_EQ( m.getElement( 0, 1 ), 11 ); - EXPECT_EQ( m.getElement( 0, 2 ), 11 ); - EXPECT_EQ( m.getElement( 0, 3 ), 11 ); - EXPECT_EQ( m.getElement( 0, 4 ), 11 ); - EXPECT_EQ( m.getElement( 0, 5 ), 6 ); - EXPECT_EQ( m.getElement( 0, 6 ), 7 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 22 ); - EXPECT_EQ( m.getElement( 1, 1 ), 22 ); - EXPECT_EQ( m.getElement( 1, 2 ), 22 ); - EXPECT_EQ( m.getElement( 1, 3 ), 22 ); - EXPECT_EQ( m.getElement( 1, 4 ), 22 ); - EXPECT_EQ( m.getElement( 1, 5 ), 13 ); - EXPECT_EQ( m.getElement( 1, 6 ), 14 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 15 ); - EXPECT_EQ( m.getElement( 2, 1 ), 16 ); - EXPECT_EQ( m.getElement( 2, 2 ), 33 ); - EXPECT_EQ( m.getElement( 2, 3 ), 33 ); - EXPECT_EQ( m.getElement( 2, 4 ), 33 ); - EXPECT_EQ( m.getElement( 2, 5 ), 33 ); - EXPECT_EQ( m.getElement( 2, 6 ), 33 ); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 3x7 dense matrix: + * + * / 11 11 11 11 11 6 7 \ + * | 22 22 22 22 22 13 14 | + * \ 15 16 33 33 33 33 33 / + */ + const IndexType rows = 3; + const IndexType cols = 7; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 3 ][ 5 ] { + { 11, 11, 11, 11, 11 }, + { 22, 22, 22, 22, 22 }, + { 33, 33, 33, 33, 33 } }; + IndexType columnIndexes[ 3 ][ 5 ] { + { 0, 1, 2, 3, 4 }, + { 0, 1, 2, 3, 4 }, + { 2, 3, 4, 5, 6 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + row.setElement( columnIndexes[ rowIdx ][ i ], values[ rowIdx ][ i ] ); + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 11 ); + EXPECT_EQ( m.getElement( 0, 3 ), 11 ); + EXPECT_EQ( m.getElement( 0, 4 ), 11 ); + EXPECT_EQ( m.getElement( 0, 5 ), 6 ); + EXPECT_EQ( m.getElement( 0, 6 ), 7 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 22 ); + EXPECT_EQ( m.getElement( 1, 1 ), 22 ); + EXPECT_EQ( m.getElement( 1, 2 ), 22 ); + EXPECT_EQ( m.getElement( 1, 3 ), 22 ); + EXPECT_EQ( m.getElement( 1, 4 ), 22 ); + EXPECT_EQ( m.getElement( 1, 5 ), 13 ); + EXPECT_EQ( m.getElement( 1, 6 ), 14 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 15 ); + EXPECT_EQ( m.getElement( 2, 1 ), 16 ); + EXPECT_EQ( m.getElement( 2, 2 ), 33 ); + EXPECT_EQ( m.getElement( 2, 3 ), 33 ); + EXPECT_EQ( m.getElement( 2, 4 ), 33 ); + EXPECT_EQ( m.getElement( 2, 5 ), 33 ); + EXPECT_EQ( m.getElement( 2, 6 ), 33 ); } template< typename Matrix > void test_AddRow() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 6x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * | 21 22 23 24 25 | - * \ 26 27 28 29 30 / - */ - const IndexType rows = 6; - const IndexType cols = 5; - - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); - - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); - - // Check the added elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 26 ); - EXPECT_EQ( m.getElement( 5, 1 ), 27 ); - EXPECT_EQ( m.getElement( 5, 2 ), 28 ); - EXPECT_EQ( m.getElement( 5, 3 ), 29 ); - EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - - // Add new elements to the old elements with a multiplying factor applied to the old elements. -/* - * The following setup results in the following 6x5 sparse matrix: - * - * / 3 6 9 12 15 \ - * | 18 21 24 27 30 | - * | 33 36 39 42 45 | - * | 48 51 54 57 60 | - * | 63 66 69 72 75 | - * \ 78 81 84 87 90 / - */ - - RealType row0 [ 5 ] = { 11, 11, 11, 11, 0 }; IndexType colIndexes0 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row1 [ 5 ] = { 22, 22, 22, 22, 0 }; IndexType colIndexes1 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row2 [ 5 ] = { 33, 33, 33, 33, 0 }; IndexType colIndexes2 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row3 [ 5 ] = { 44, 44, 44, 44, 0 }; IndexType colIndexes3 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row4 [ 5 ] = { 55, 55, 55, 55, 0 }; IndexType colIndexes4 [ 5 ] = { 0, 1, 2, 3, 4 }; - RealType row5 [ 5 ] = { 66, 66, 66, 66, 0 }; IndexType colIndexes5 [ 5 ] = { 0, 1, 2, 3, 4 }; - - IndexType row = 0; - IndexType elements = 5; - RealType thisRowMultiplicator = 0; - - m.addRow( row++, colIndexes0, row0, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes1, row1, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes2, row2, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes3, row3, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes4, row4, elements, thisRowMultiplicator++ ); - m.addRow( row++, colIndexes5, row5, elements, thisRowMultiplicator++ ); - + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + /* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * | 21 22 23 24 25 | + * \ 26 27 28 29 30 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); + EXPECT_EQ( m.getElement( 5, 1 ), 27 ); + EXPECT_EQ( m.getElement( 5, 2 ), 28 ); + EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 12 15 \ + * | 18 21 24 27 30 | + * | 33 36 39 42 45 | + * | 48 51 54 57 60 | + * | 63 66 69 72 75 | + * \ 78 81 84 87 90 / + */ + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 6 ][ 5 ] { + { 11, 11, 11, 11, 0 }, + { 22, 22, 22, 22, 0 }, + { 33, 33, 33, 33, 0 }, + { 44, 44, 44, 44, 0 }, + { 55, 55, 55, 55, 0 }, + { 66, 66, 66, 66, 0 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 5; i++ ) + { + RealType& val = row.getValue( i ); + val = rowIdx * val + values[ rowIdx ][ i ]; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); EXPECT_EQ( m.getElement( 0, 1 ), 11 ); EXPECT_EQ( m.getElement( 0, 2 ), 11 ); EXPECT_EQ( m.getElement( 0, 3 ), 11 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 28 ); EXPECT_EQ( m.getElement( 1, 1 ), 29 ); EXPECT_EQ( m.getElement( 1, 2 ), 30 ); EXPECT_EQ( m.getElement( 1, 3 ), 31 ); EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 55 ); EXPECT_EQ( m.getElement( 2, 1 ), 57 ); EXPECT_EQ( m.getElement( 2, 2 ), 59 ); EXPECT_EQ( m.getElement( 2, 3 ), 61 ); EXPECT_EQ( m.getElement( 2, 4 ), 30 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 92 ); EXPECT_EQ( m.getElement( 3, 1 ), 95 ); EXPECT_EQ( m.getElement( 3, 2 ), 98 ); EXPECT_EQ( m.getElement( 3, 3 ), 101 ); EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 139 ); EXPECT_EQ( m.getElement( 4, 1 ), 143 ); EXPECT_EQ( m.getElement( 4, 2 ), 147 ); EXPECT_EQ( m.getElement( 4, 3 ), 151 ); EXPECT_EQ( m.getElement( 4, 4 ), 100 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 196 ); EXPECT_EQ( m.getElement( 5, 1 ), 201 ); EXPECT_EQ( m.getElement( 5, 2 ), 206 ); @@ -715,31 +765,31 @@ void test_VectorProduct() */ const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) m.setElement( i, j, value++ ); using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - + VectorType inVector; inVector.setSize( 4 ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) + for( IndexType i = 0; i < inVector.getSize(); i++ ) inVector.setElement( i, 2 ); - VectorType outVector; + VectorType outVector; outVector.setSize( 5 ); for( IndexType j = 0; j < outVector.getSize(); j++ ) outVector.setElement( j, 0 ); - - + + m.vectorProduct( inVector, outVector); - + EXPECT_EQ( outVector.getElement( 0 ), 20 ); EXPECT_EQ( outVector.getElement( 1 ), 52 ); EXPECT_EQ( outVector.getElement( 2 ), 84 ); @@ -764,16 +814,16 @@ void test_AddMatrix() */ const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) m.setElement( i, j, value++ ); - + /* * Sets up the following 5x4 dense matrix: * @@ -783,16 +833,16 @@ void test_AddMatrix() * | 13 14 15 16 | * \ 17 18 19 20 / */ - + Matrix m2; m2.reset(); m2.setDimensions( rows, cols ); - + RealType newValue = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++) m2.setElement( i, j, newValue++ ); - + /* * Sets up the following 5x4 dense matrix: * @@ -802,63 +852,63 @@ void test_AddMatrix() * | 13 14 15 16 | * \ 17 18 19 20 / */ - + Matrix mResult; mResult.reset(); mResult.setDimensions( rows, cols ); - + mResult = m; - + RealType matrixMultiplicator = 2; RealType thisMatrixMultiplicator = 1; - + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); - + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); - + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); - + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); - + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); - + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); - + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); EXPECT_EQ( mResult.getElement( 0, 2 ), 9 ); EXPECT_EQ( mResult.getElement( 0, 3 ), 12 ); - + EXPECT_EQ( mResult.getElement( 1, 0 ), 15 ); EXPECT_EQ( mResult.getElement( 1, 1 ), 18 ); EXPECT_EQ( mResult.getElement( 1, 2 ), 21 ); EXPECT_EQ( mResult.getElement( 1, 3 ), 24 ); - + EXPECT_EQ( mResult.getElement( 2, 0 ), 27 ); EXPECT_EQ( mResult.getElement( 2, 1 ), 30 ); EXPECT_EQ( mResult.getElement( 2, 2 ), 33 ); EXPECT_EQ( mResult.getElement( 2, 3 ), 36 ); - + EXPECT_EQ( mResult.getElement( 3, 0 ), 39 ); EXPECT_EQ( mResult.getElement( 3, 1 ), 42 ); EXPECT_EQ( mResult.getElement( 3, 2 ), 45 ); EXPECT_EQ( mResult.getElement( 3, 3 ), 48 ); - + EXPECT_EQ( mResult.getElement( 4, 0 ), 51 ); EXPECT_EQ( mResult.getElement( 4, 1 ), 54 ); EXPECT_EQ( mResult.getElement( 4, 2 ), 57 ); @@ -882,16 +932,16 @@ void test_GetMatrixProduct() */ const IndexType leftRows = 5; const IndexType leftCols = 4; - + Matrix leftMatrix; leftMatrix.reset(); leftMatrix.setDimensions( leftRows, leftCols ); - + RealType value = 1; for( IndexType i = 0; i < leftRows; i++ ) for( IndexType j = 0; j < leftCols; j++) leftMatrix.setElement( i, j, value++ ); - + /* * Sets up the following 4x5 dense matrix: * @@ -902,16 +952,16 @@ void test_GetMatrixProduct() */ const IndexType rightRows = 4; const IndexType rightCols = 5; - + Matrix rightMatrix; rightMatrix.reset(); rightMatrix.setDimensions( rightRows, rightCols ); - + RealType newValue = 1; for( IndexType i = 0; i < rightRows; i++ ) for( IndexType j = 0; j < rightCols; j++) rightMatrix.setElement( i, j, newValue++ ); - + /* * Sets up the following 5x5 resulting dense matrix: * @@ -921,48 +971,48 @@ void test_GetMatrixProduct() * | 0 0 0 0 | * \ 0 0 0 0 / */ - + Matrix mResult; mResult.reset(); mResult.setDimensions( leftRows, rightCols ); mResult.setValue( 0 ); - + RealType leftMatrixMultiplicator = 1; RealType rightMatrixMultiplicator = 2; -/* +/* * / 1 2 3 4 \ / 220 240 260 280 300 \ * | 5 6 7 8 | / 1 2 3 4 5 \ | 492 544 596 648 700 | * 1 * | 9 10 11 12 | * 2 * | 6 7 8 9 10 | = | 764 848 932 1016 1100 | * | 13 14 15 16 | | 11 12 13 14 15 | | 1036 1152 1268 1384 1500 | * \ 17 18 19 20 / \ 16 17 18 19 20 / \ 1308 1456 1604 1752 1900 / */ - + mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator ); - + EXPECT_EQ( mResult.getElement( 0, 0 ), 220 ); EXPECT_EQ( mResult.getElement( 0, 1 ), 240 ); EXPECT_EQ( mResult.getElement( 0, 2 ), 260 ); EXPECT_EQ( mResult.getElement( 0, 3 ), 280 ); EXPECT_EQ( mResult.getElement( 0, 4 ), 300 ); - + EXPECT_EQ( mResult.getElement( 1, 0 ), 492 ); EXPECT_EQ( mResult.getElement( 1, 1 ), 544 ); EXPECT_EQ( mResult.getElement( 1, 2 ), 596 ); EXPECT_EQ( mResult.getElement( 1, 3 ), 648 ); EXPECT_EQ( mResult.getElement( 1, 4 ), 700 ); - + EXPECT_EQ( mResult.getElement( 2, 0 ), 764 ); EXPECT_EQ( mResult.getElement( 2, 1 ), 848 ); EXPECT_EQ( mResult.getElement( 2, 2 ), 932 ); EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 ); EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 ); - + EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 ); EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 ); EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 ); EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 ); EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 ); - + EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 ); EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 ); EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 ); @@ -996,36 +1046,36 @@ void test_GetTransposition() m.setElement( i, j, value++ ); m.print( std::cout ); - + /* * Sets up the following 2x3 dense matrix: * * / 0 0 0 \ * \ 0 0 0 / - */ + */ Matrix mTransposed; mTransposed.reset(); mTransposed.setDimensions( cols, rows ); - + mTransposed.print( std::cout ); - + RealType matrixMultiplicator = 1; - + mTransposed.getTransposition( m, matrixMultiplicator ); - + mTransposed.print( std::cout ); - + /* * Should result in the following 2x3 dense matrix: * * / 1 3 5 \ * \ 2 4 6 / - */ - + */ + EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 ); EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 ); EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 ); - + EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 ); EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 ); EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 ); @@ -1048,66 +1098,114 @@ void test_PerformSORIteration() */ const IndexType rows = 4; const IndexType cols = 4; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + m.setElement( 0, 0, 4.0 ); // 0th row m.setElement( 0, 1, 1.0 ); m.setElement( 0, 2, 1.0 ); m.setElement( 0, 3, 1.0 ); - + m.setElement( 1, 0, 1.0 ); // 1st row m.setElement( 1, 1, 4.0 ); m.setElement( 1, 2, 1.0 ); m.setElement( 1, 3, 1.0 ); - + m.setElement( 2, 0, 1.0 ); m.setElement( 2, 1, 1.0 ); // 2nd row m.setElement( 2, 2, 4.0 ); m.setElement( 2, 3, 1.0 ); - + m.setElement( 3, 0, 1.0 ); // 3rd row m.setElement( 3, 1, 1.0 ); m.setElement( 3, 2, 1.0 ); m.setElement( 3, 3, 4.0 ); - + RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; - + IndexType row = 0; RealType omega = 1; - + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], -0.5 ); EXPECT_EQ( xVector[ 1 ], 1.0 ); EXPECT_EQ( xVector[ 2 ], 1.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], -0.5 ); EXPECT_EQ( xVector[ 1 ], -0.125 ); EXPECT_EQ( xVector[ 2 ], 1.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], -0.5 ); EXPECT_EQ( xVector[ 1 ], -0.125 ); EXPECT_EQ( xVector[ 2 ], 0.15625 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], -0.5 ); EXPECT_EQ( xVector[ 1 ], -0.125 ); EXPECT_EQ( xVector[ 2 ], 0.15625 ); EXPECT_EQ( xVector[ 3 ], 0.3671875 ); } +template< typename Matrix > +void test_AssignmentOperator() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix( rows, columns ); + matrix.getValues() = 0.0; + matrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + cudaMatrix.setElement( i, j, i + j ); + + matrix.getValues() = 0.0; + matrix = cudaMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + + template< typename Matrix > void test_SaveAndLoad() { @@ -1124,65 +1222,61 @@ void test_SaveAndLoad() */ const IndexType rows = 4; const IndexType cols = 4; - + Matrix savedMatrix; savedMatrix.reset(); savedMatrix.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++ ) for( IndexType j = 0; j < cols; j++ ) savedMatrix.setElement( i, j, value++ ); - + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); - + Matrix loadedMatrix; - loadedMatrix.reset(); - loadedMatrix.setDimensions( rows, cols ); - + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); - + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 4 ); - + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 8 ); - + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 9 ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); - + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 13 ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 14 ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); - - std::cout << "\nThis will create a file called '" << TEST_FILE_NAME << "' (of the matrix created in the test function), in .../tnl-dev/Debug/bin/\n\n"; } template< typename Matrix > @@ -1202,33 +1296,33 @@ void test_Print() */ const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); - + RealType value = 1; for( IndexType i = 0; i < rows; i++) for( IndexType j = 0; j < cols; j++) m.setElement( i, j, value++ ); - + #include <sstream> std::stringstream printed; std::stringstream couted; - + //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + auto old_buf = std::cout.rdbuf(printed.rdbuf()); m.print( std::cout ); //all the std::cout goes to ss std::cout.rdbuf(old_buf); //reset - + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3 Col:3->4\t\n" "Row: 1 -> Col:0->5 Col:1->6 Col:2->7 Col:3->8\t\n" "Row: 2 -> Col:0->9 Col:1->10 Col:2->11 Col:3->12\t\n" "Row: 3 -> Col:0->13 Col:1->14 Col:2->15 Col:3->16\t\n" "Row: 4 -> Col:0->17 Col:1->18 Col:2->19 Col:3->20\t\n"; - + EXPECT_EQ( printed.str(), couted.str() ); } @@ -1273,108 +1367,120 @@ using MatrixTypes = ::testing::Types TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); +TYPED_TEST( MatrixTest, getSerializationType ) +{ + test_GetSerializationType(); +} + TYPED_TEST( MatrixTest, setDimensionsTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetDimensions< MatrixType >(); } TYPED_TEST( MatrixTest, setLikeTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetLike< MatrixType, MatrixType >(); } TYPED_TEST( MatrixTest, getRowLengthTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_GetRowLength< MatrixType >(); } TYPED_TEST( MatrixTest, getNumberOfMatrixElementsTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_GetNumberOfMatrixElements< MatrixType >(); } TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_GetNumberOfNonzeroMatrixElements< MatrixType >(); } TYPED_TEST( MatrixTest, resetTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_Reset< MatrixType >(); } TYPED_TEST( MatrixTest, setValueTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetValue< MatrixType >(); } TYPED_TEST( MatrixTest, setElementTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetElement< MatrixType >(); } TYPED_TEST( MatrixTest, addElementTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_AddElement< MatrixType >(); } TYPED_TEST( MatrixTest, setRowTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SetRow< MatrixType >(); } TYPED_TEST( MatrixTest, addRowTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_AddRow< MatrixType >(); } TYPED_TEST( MatrixTest, vectorProductTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_VectorProduct< MatrixType >(); } TYPED_TEST( MatrixTest, addMatrixTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_AddMatrix< MatrixType >(); } +TYPED_TEST( MatrixTest, assignmentOperatorTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AssignmentOperator< MatrixType >(); +} + TYPED_TEST( MatrixTest, saveAndLoadTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_SaveAndLoad< MatrixType >(); } TYPED_TEST( MatrixTest, printTest ) { using MatrixType = typename TestFixture::MatrixType; - + test_Print< MatrixType >(); } @@ -1392,7 +1498,7 @@ TYPED_TEST( MatrixTest, printTest ) //} //#endif -TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host ) +/*TEST( DenseMatrixTest, Dense_getMatrixProductTest_Host ) { bool testRan = false; EXPECT_TRUE( testRan ); @@ -1496,7 +1602,8 @@ TEST( DenseMatrixTest, Dense_performSORIterationTest_Cuda ) std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n"; } #endif + * */ -#endif +#endif // HAVE_GTEST #include "../main.h" diff --git a/src/UnitTests/Matrices/DistributedMatrixTest.h b/src/UnitTests/Matrices/DistributedMatrixTest.h index 93673a29063db52afcabf59165ea5949471cf1bc..9487e59776b4452f42e894ddc856100edde62b9f 100644 --- a/src/UnitTests/Matrices/DistributedMatrixTest.h +++ b/src/UnitTests/Matrices/DistributedMatrixTest.h @@ -13,7 +13,7 @@ #include <TNL/Communicators/NoDistrCommunicator.h> #include <TNL/Matrices/DistributedMatrix.h> #include <TNL/Containers/Partitioner.h> -#include <TNL/Matrices/CSR.h> +#include <TNL/Matrices/Legacy/CSR.h> using namespace TNL; @@ -171,7 +171,7 @@ TYPED_TEST( DistributedMatrixTest, getCompressedRowLengths ) this->matrix.setCompressedRowLengths( this->rowLengths ); RowLengthsVector output; - this->matrix.getCompressedRowLengths( output ); + this->matrix.getCompressedRowLengths( output ); // TODO: replace this with getRowCapacities EXPECT_EQ( output, this->rowLengths ); } diff --git a/src/UnitTests/Matrices/Legacy/CMakeLists.txt b/src/UnitTests/Matrices/Legacy/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..46c6be2cdacbb24648f60aa9e6337f49cd59ad8b --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/CMakeLists.txt @@ -0,0 +1,72 @@ +IF( BUILD_CUDA ) + #CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + #TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_CSR SparseMatrixTest_CSR.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + CUDA_ADD_EXECUTABLE( Legacy_SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + +ELSE( BUILD_CUDA ) + #ADD_EXECUTABLE( Legacy_SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) + #TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) + #TARGET_LINK_LIBRARIES( Legacy_SparseMatrixCopyTest ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest SparseMatrixTest.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_AdEllpack SparseMatrixTest_AdEllpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_AdEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_AdEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_BiEllpack SparseMatrixTest_BiEllpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_BiEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_BiEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_ChunkedEllpack SparseMatrixTest_ChunkedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_ChunkedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_ChunkedEllpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_CSR SparseMatrixTest_CSR.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_CSR PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_CSR ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_Ellpack SparseMatrixTest_Ellpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_Ellpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_Ellpack ${GTEST_BOTH_LIBRARIES} ) + + ADD_EXECUTABLE( Legacy_SparseMatrixTest_SlicedEllpack SparseMatrixTest_SlicedEllpack.cpp ) + TARGET_COMPILE_OPTIONS( Legacy_SparseMatrixTest_SlicedEllpack PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( Legacy_SparseMatrixTest_SlicedEllpack ${GTEST_BOTH_LIBRARIES} ) + +ENDIF( BUILD_CUDA ) + + +#ADD_TEST( Legacy_SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) +# TODO: Uncomment the following when AdEllpack works +#ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_Ellpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_Ellpack${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( Legacy_SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/Legacy_SparseMatrixTest_SlicedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) +# TODO: DenseMatrixTest is not finished +#ADD_TEST( DenseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/DenseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..30b8f64ecfdbf228856d272a71d3de08980f3987 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixCopyTest.cpp - description + ------------------- + begin : Jun 25, 2017 + copyright : (C) 2017 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..431fe481c2db1d5b18cfa849e882c0ed836463c1 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + SparseMatrixCopyTest.cu - description + ------------------- + begin : Jun 25, 2017 + copyright : (C) 2017 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SparseMatrixCopyTest.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h new file mode 100644 index 0000000000000000000000000000000000000000..aece39d9a848e02ea32f5fc29446a6b1dfbf7f37 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixCopyTest.h @@ -0,0 +1,573 @@ +/*************************************************************************** + SparseMatrixCopyTest.h - description + ------------------- + begin : Jun 25, 2017 + copyright : (C) 2017 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Matrices/Legacy/CSR.h> +#include <TNL/Matrices/Legacy/Ellpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> + +#include <TNL/Matrices/SparseMatrix.h> +#include <TNL/Matrices/MatrixType.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Containers/Segments/SlicedEllpack.h> + +using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; +using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; +using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >; +using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >; +using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >; +using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >; + +/*template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >;*/ + + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +/* + * Sets up the following 10x6 sparse matrix: + * + * / 1 2 \ + * | 3 4 5 | + * | 6 7 8 | + * | 9 10 11 12 13 | + * | 14 15 16 17 18 | + * | 19 20 | + * | 21 | + * | 22 | + * | 23 24 25 26 27 | + * \ 28 / + */ +template< typename Matrix > +void setupUnevenRowSizeMatrix( Matrix& m ) +{ + const int rows = 10; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 5 ); + rowLengths.setElement( 0, 2 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 3 ); + rowLengths.setElement( 5, 2 ); + rowLengths.setElement( 6, 1 ); + rowLengths.setElement( 7, 1 ); + rowLengths.setElement( 9, 1 ); + m.setCompressedRowLengths( rowLengths ); + + int value = 1; + for( int i = 0; i < cols - 4; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( int i = 3; i < cols; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( int i = 0; i < cols - 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( int i = 1; i < cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( int i = 0; i < cols - 1; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( int i = 0; i < cols - 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + m.setElement( 6, 0, value++ ); // 6th row + + m.setElement( 7, 0, value++ ); // 7th row + + for( int i = 0; i < cols - 1; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + m.setElement( 9, 5, value++ ); // 9th row +} + +template< typename Matrix > +void checkUnevenRowSizeMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 10 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 3 ); + EXPECT_EQ( m.getElement( 1, 4 ), 4 ); + EXPECT_EQ( m.getElement( 1, 5 ), 5 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 6 ); + EXPECT_EQ( m.getElement( 2, 1 ), 7 ); + EXPECT_EQ( m.getElement( 2, 2 ), 8 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 9 ); + EXPECT_EQ( m.getElement( 3, 2 ), 10 ); + EXPECT_EQ( m.getElement( 3, 3 ), 11 ); + EXPECT_EQ( m.getElement( 3, 4 ), 12 ); + EXPECT_EQ( m.getElement( 3, 5 ), 13 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 14 ); + EXPECT_EQ( m.getElement( 4, 1 ), 15 ); + EXPECT_EQ( m.getElement( 4, 2 ), 16 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); + EXPECT_EQ( m.getElement( 4, 4 ), 18 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 20 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 21 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 22 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 23 ); + EXPECT_EQ( m.getElement( 8, 1 ), 24 ); + EXPECT_EQ( m.getElement( 8, 2 ), 25 ); + EXPECT_EQ( m.getElement( 8, 3 ), 26 ); + EXPECT_EQ( m.getElement( 8, 4 ), 27 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 0 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 0 ); + EXPECT_EQ( m.getElement( 9, 5 ), 28 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 2 1 \ + * | 5 4 3 | + * | 8 7 6 | + * | 11 10 9 | + * | 14 13 12 | + * | 16 15 | + * \ 17 / + */ +template< typename Matrix > +void setupAntiTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0, 4); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = cols - 1; j > 2; j-- ) + if( j - i + 1 < cols && j - i + 1 >= 0 ) + m.setElement( i, j - i + 1, value++ ); +} + +template< typename Matrix > +void checkAntiTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 2 ); + EXPECT_EQ( m.getElement( 0, 5 ), 1); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 0 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 5 ); + EXPECT_EQ( m.getElement( 1, 4 ), 4 ); + EXPECT_EQ( m.getElement( 1, 5 ), 3 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 8 ); + EXPECT_EQ( m.getElement( 2, 3 ), 7 ); + EXPECT_EQ( m.getElement( 2, 4 ), 6 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 11 ); + EXPECT_EQ( m.getElement( 3, 2 ), 10 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 14 ); + EXPECT_EQ( m.getElement( 4, 1 ), 13 ); + EXPECT_EQ( m.getElement( 4, 2 ), 12 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 16 ); + EXPECT_EQ( m.getElement( 5, 1 ), 15 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 17 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); +} + +/* + * Sets up the following 7x6 sparse matrix: + * + * / 1 2 \ + * | 3 4 5 | + * | 6 7 8 | + * | 9 10 11 | + * | 12 13 14 | + * | 15 16 | + * \ 17 / + */ +template< typename Matrix > +void setupTriDiagMatrix( Matrix& m ) +{ + const int rows = 7; + const int cols = 6; + m.reset(); + m.setDimensions( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setValue( 3 ); + rowLengths.setElement( 0 , 4 ); + rowLengths.setElement( 1, 4 ); + m.setCompressedRowLengths( rowLengths ); + + int value = 1; + for( int i = 0; i < rows; i++ ) + for( int j = 0; j < 3; j++ ) + if( i + j - 1 >= 0 && i + j - 1 < cols ) + m.setElement( i, i + j - 1, value++ ); +} + +template< typename Matrix > +void checkTriDiagMatrix( Matrix& m ) +{ + ASSERT_EQ( m.getRows(), 7 ); + ASSERT_EQ( m.getColumns(), 6 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 3 ); + EXPECT_EQ( m.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 6 ); + EXPECT_EQ( m.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 9 ); + EXPECT_EQ( m.getElement( 3, 3 ), 10 ); + EXPECT_EQ( m.getElement( 3, 4 ), 11 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 12 ); + EXPECT_EQ( m.getElement( 4, 4 ), 13 ); + EXPECT_EQ( m.getElement( 4, 5 ), 14 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 15 ); + EXPECT_EQ( m.getElement( 5, 5 ), 16 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 17 ); +} + +template< typename Matrix1, typename Matrix2 > +void testCopyAssignment() +{ + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag2 ); + } + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +template< typename Matrix1, typename Matrix2 > +void testConversion() +{ + + { + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + //TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 ); + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag2 ); + } + + { + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + //TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 ); + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); + } + + { + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + //TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 ); + unevenRowSize2 = unevenRowSize1; + checkUnevenRowSizeMatrix( unevenRowSize2 ); + } +} + +TEST( SparseMatrixCopyTest, CSR_HostToHost ) +{ + testCopyAssignment< CSR_host, CSR_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, CSR_HostToCuda ) +{ + testCopyAssignment< CSR_host, CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, CSR_CudaToHost ) +{ + testCopyAssignment< CSR_cuda, CSR_host >(); +} + +TEST( SparseMatrixCopyTest, CSR_CudaToCuda ) +{ + testCopyAssignment< CSR_cuda, CSR_cuda >(); +} +#endif + + +TEST( SparseMatrixCopyTest, Ellpack_HostToHost ) +{ + testCopyAssignment< E_host, E_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, Ellpack_HostToCuda ) +{ + testCopyAssignment< E_host, E_cuda >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_CudaToHost ) +{ + testCopyAssignment< E_cuda, E_host >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_CudaToCuda ) +{ + testCopyAssignment< E_cuda, E_cuda >(); +} +#endif + + +TEST( SparseMatrixCopyTest, SlicedEllpack_HostToHost ) +{ + testCopyAssignment< SE_host, SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, SlicedEllpack_HostToCuda ) +{ + testCopyAssignment< SE_host, SE_cuda >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToHost ) +{ + testCopyAssignment< SE_cuda, SE_host >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda ) +{ + testCopyAssignment< SE_cuda, SE_cuda >(); +} +#endif + + +// test conversion between formats +TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host ) +{ + testConversion< CSR_host, E_host >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_to_CSR_host ) +{ + testConversion< E_host, CSR_host >(); +} + +TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_host ) +{ + testConversion< CSR_host, SE_host >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_host ) +{ + testConversion< SE_host, CSR_host >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_host ) +{ + testConversion< E_host, SE_host >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_host ) +{ + testConversion< SE_host, E_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, CSR_to_Ellpack_cuda ) +{ + testConversion< CSR_cuda, E_cuda >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_to_CSR_cuda ) +{ + testConversion< E_cuda, CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, CSR_to_SlicedEllpack_cuda ) +{ + testConversion< CSR_cuda, SE_cuda >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_to_CSR_cuda ) +{ + testConversion< SE_cuda, CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, Ellpack_to_SlicedEllpack_cuda ) +{ + testConversion< E_cuda, SE_cuda >(); +} + +TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) +{ + testConversion< SE_cuda, E_cuda >(); +} +#endif + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest.cu diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h new file mode 100644 index 0000000000000000000000000000000000000000..ed8bec79603b41e291246428cb59b9a040a56744 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.h @@ -0,0 +1,39 @@ +/*************************************************************************** + SparseMatrixTest.h - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Matrices/Legacy/CSR.h> + +#include "SparseMatrixTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +using CSR_host_float = TNL::Matrices::CSR< float, TNL::Devices::Host, int >; +using CSR_host_int = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; + +using CSR_cuda_float = TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >; +using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; + +TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host ) +{ + test_PerformSORIteration< CSR_host_float >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixTest, CSR_perforSORIterationTest_Cuda ) +{ + // test_PerformSORIteration< CSR_cuda_float >(); +} +#endif + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp similarity index 75% rename from src/UnitTests/Matrices/SparseMatrixTest.hpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp index ef5b28d240a65c5e26eb987c42b76688c59a8d87..c6ff5cbd7349ed52e65d794b3a4df0c7915ba8e6 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest.hpp @@ -11,14 +11,15 @@ #include <TNL/Containers/Vector.h> #include <TNL/Containers/VectorView.h> #include <TNL/Math.h> +#include <TNL/Algorithms/ParallelFor.h> #include <iostream> // Temporary, until test_OperatorEquals doesn't work for all formats. -#include <TNL/Matrices/ChunkedEllpack.h> -#include <TNL/Matrices/AdEllpack.h> -#include <TNL/Matrices/BiEllpack.h> +#include <TNL/Matrices/Legacy/ChunkedEllpack.h> +#include <TNL/Matrices/Legacy/AdEllpack.h> +#include <TNL/Matrices/Legacy/BiEllpack.h> -#ifdef HAVE_GTEST +#ifdef HAVE_GTEST #include <gtest/gtest.h> template< typename MatrixHostFloat, typename MatrixHostInt > @@ -36,7 +37,7 @@ void cuda_test_GetType() bool testRan = false; EXPECT_TRUE( testRan ); std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; - std::cerr << "This test has not been implemented properly yet.\n" << std::endl; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; } template< typename Matrix > @@ -45,13 +46,13 @@ void test_SetDimensions() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 9; const IndexType cols = 8; - + Matrix m; m.setDimensions( rows, cols ); - + EXPECT_EQ( m.getRows(), 9 ); EXPECT_EQ( m.getColumns(), 8 ); } @@ -62,41 +63,41 @@ void test_SetCompressedRowLengths() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + const IndexType rows = 10; const IndexType cols = 11; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); rowLengths.setValue( 3 ); - + IndexType rowLength = 1; for( IndexType i = 2; i < rows; i++ ) rowLengths.setElement( i, rowLength++ ); - + m.setCompressedRowLengths( rowLengths ); - + // Insert values into the rows. RealType value = 1; - + for( IndexType i = 0; i < 3; i++ ) // 0th row m.setElement( 0, i, value++ ); - + for( IndexType i = 0; i < 3; i++ ) // 1st row m.setElement( 1, i, value++ ); - + for( IndexType i = 0; i < 1; i++ ) // 2nd row m.setElement( 2, i, value++ ); - + for( IndexType i = 0; i < 2; i++ ) // 3rd row m.setElement( 3, i, value++ ); - + for( IndexType i = 0; i < 3; i++ ) // 4th row m.setElement( 4, i, value++ ); - + for( IndexType i = 0; i < 4; i++ ) // 5th row m.setElement( 5, i, value++ ); @@ -111,8 +112,8 @@ void test_SetCompressedRowLengths() for( IndexType i = 0; i < 8; i++ ) // 9th row m.setElement( 9, i, value++ ); - - + + EXPECT_EQ( m.getNonZeroRowLength( 0 ), 3 ); EXPECT_EQ( m.getNonZeroRowLength( 1 ), 3 ); EXPECT_EQ( m.getNonZeroRowLength( 2 ), 1 ); @@ -131,32 +132,101 @@ void test_SetLike() using RealType = typename Matrix1::RealType; using DeviceType = typename Matrix1::DeviceType; using IndexType = typename Matrix1::IndexType; - + const IndexType rows = 8; const IndexType cols = 7; - + Matrix1 m1; m1.reset(); m1.setDimensions( rows + 1, cols + 2 ); - + Matrix2 m2; m2.reset(); m2.setDimensions( rows, cols ); - + m1.setLike( m2 ); - - + + EXPECT_EQ( m1.getRows(), m2.getRows() ); EXPECT_EQ( m1.getColumns(), m2.getColumns() ); } +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + } + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); +} + template< typename Matrix > void test_Reset() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 5x4 sparse matrix: * @@ -166,27 +236,253 @@ void test_Reset() * | 0 0 0 0 | * \ 0 0 0 0 / */ - + const IndexType rows = 5; const IndexType cols = 4; - + Matrix m; m.setDimensions( rows, cols ); - + m.reset(); - - + + EXPECT_EQ( m.getRows(), 0 ); EXPECT_EQ( m.getColumns(), 0 ); } +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + +/* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths; + rowLengths.setSize( rows ); + rowLengths.setElement( 0, 4 ); + rowLengths.setElement( 1, 3 ); + rowLengths.setElement( 2, 8 ); + rowLengths.setElement( 3, 2 ); + for( IndexType i = 4; i < rows - 2; i++ ) + { + rowLengths.setElement( i, 1 ); + } + rowLengths.setElement( 8, 10 ); + rowLengths.setElement( 9, 10 ); + m.setCompressedRowLengths( rowLengths ); + + /*RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + { + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + }*/ + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + RealType val; + switch( rowIdx ) + { + case 0: + val = 1; + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, 2 * i, val++ ); + break; + case 1: + val = 5; + for( IndexType i = 0; i < 3; i++ ) + row.setElement( i, i, val++ ); + break; + case 2: + val = 8; + for( IndexType i = 0; i < 8; i++ ) + row.setElement( i, i, val++ ); + break; + case 3: + val = 16; + for( IndexType i = 0; i < 2; i++ ) + row.setElement( i, i, val++ ); + break; + case 4: + row.setElement( 0, 0, 18 ); + break; + case 5: + row.setElement( 0, 0, 19 ); + break; + case 6: + row.setElement( 0, 0, 20 ); + break; + case 7: + row.setElement( 0, 0, 21 ); + break; + case 8: + val = 22; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + case 9: + val = 32; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + + template< typename Matrix > void test_SetElement() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 10x10 sparse matrix: * @@ -201,15 +497,15 @@ void test_SetElement() * | 22 23 24 25 26 27 28 29 30 31 | * \ 32 33 34 35 36 37 38 39 40 41 / */ - + const IndexType rows = 10; const IndexType cols = 10; - + Matrix m; m.reset(); - + m.setDimensions( rows, cols ); - + typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); rowLengths.setElement( 0, 4 ); @@ -223,29 +519,29 @@ void test_SetElement() rowLengths.setElement( 8, 10 ); rowLengths.setElement( 9, 10 ); m.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < 4; i++ ) m.setElement( 0, 2 * i, value++ ); - + for( IndexType i = 0; i < 3; i++ ) m.setElement( 1, i, value++ ); - + for( IndexType i = 0; i < 8; i++ ) m.setElement( 2, i, value++ ); - + for( IndexType i = 0; i < 2; i++ ) m.setElement( 3, i, value++ ); - + for( IndexType i = 4; i < 8; i++ ) m.setElement( i, 0, value++ ); - + for( IndexType j = 8; j < rows; j++) { for( IndexType i = 0; i < cols; i++ ) m.setElement( j, i, value++ ); } - + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 0 ); EXPECT_EQ( m.getElement( 0, 2 ), 2 ); @@ -256,7 +552,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 0, 7 ), 0 ); EXPECT_EQ( m.getElement( 0, 8 ), 0 ); EXPECT_EQ( m.getElement( 0, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); EXPECT_EQ( m.getElement( 1, 1 ), 6 ); EXPECT_EQ( m.getElement( 1, 2 ), 7 ); @@ -267,7 +563,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 1, 7 ), 0 ); EXPECT_EQ( m.getElement( 1, 8 ), 0 ); EXPECT_EQ( m.getElement( 1, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); EXPECT_EQ( m.getElement( 2, 1 ), 9 ); EXPECT_EQ( m.getElement( 2, 2 ), 10 ); @@ -278,7 +574,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 2, 7 ), 15 ); EXPECT_EQ( m.getElement( 2, 8 ), 0 ); EXPECT_EQ( m.getElement( 2, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); EXPECT_EQ( m.getElement( 3, 1 ), 17 ); EXPECT_EQ( m.getElement( 3, 2 ), 0 ); @@ -289,7 +585,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 3, 7 ), 0 ); EXPECT_EQ( m.getElement( 3, 8 ), 0 ); EXPECT_EQ( m.getElement( 3, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); EXPECT_EQ( m.getElement( 4, 1 ), 0 ); EXPECT_EQ( m.getElement( 4, 2 ), 0 ); @@ -300,7 +596,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 4, 7 ), 0 ); EXPECT_EQ( m.getElement( 4, 8 ), 0 ); EXPECT_EQ( m.getElement( 4, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); EXPECT_EQ( m.getElement( 5, 2 ), 0 ); @@ -311,7 +607,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 5, 7 ), 0 ); EXPECT_EQ( m.getElement( 5, 8 ), 0 ); EXPECT_EQ( m.getElement( 5, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); EXPECT_EQ( m.getElement( 6, 1 ), 0 ); EXPECT_EQ( m.getElement( 6, 2 ), 0 ); @@ -322,7 +618,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 6, 7 ), 0 ); EXPECT_EQ( m.getElement( 6, 8 ), 0 ); EXPECT_EQ( m.getElement( 6, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); EXPECT_EQ( m.getElement( 7, 1 ), 0 ); EXPECT_EQ( m.getElement( 7, 2 ), 0 ); @@ -333,7 +629,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 7, 7 ), 0 ); EXPECT_EQ( m.getElement( 7, 8 ), 0 ); EXPECT_EQ( m.getElement( 7, 9 ), 0 ); - + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); EXPECT_EQ( m.getElement( 8, 1 ), 23 ); EXPECT_EQ( m.getElement( 8, 2 ), 24 ); @@ -344,7 +640,7 @@ void test_SetElement() EXPECT_EQ( m.getElement( 8, 7 ), 29 ); EXPECT_EQ( m.getElement( 8, 8 ), 30 ); EXPECT_EQ( m.getElement( 8, 9 ), 31 ); - + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); EXPECT_EQ( m.getElement( 9, 1 ), 33 ); EXPECT_EQ( m.getElement( 9, 2 ), 34 ); @@ -363,7 +659,7 @@ void test_AddElement() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 6x5 sparse matrix: * @@ -374,10 +670,10 @@ void test_AddElement() * | 0 11 0 0 0 | * \ 0 0 0 12 0 / */ - + const IndexType rows = 6; const IndexType cols = 5; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); @@ -385,61 +681,61 @@ void test_AddElement() rowLengths.setSize( rows ); rowLengths.setValue( 3 ); m.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < cols - 2; i++ ) // 0th row m.setElement( 0, i, value++ ); - + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row m.setElement( 1, i, value++ ); - + for( IndexType i = 2; i < cols; i++ ) // 2nd row m.setElement( 2, i, value++ ); - + m.setElement( 3, 0, value++ ); // 3rd row - + m.setElement( 4, 1, value++ ); // 4th row - + m.setElement( 5, 3, value++ ); // 5th row - - + + // Check the set elements EXPECT_EQ( m.getElement( 0, 0 ), 1 ); EXPECT_EQ( m.getElement( 0, 1 ), 2 ); EXPECT_EQ( m.getElement( 0, 2 ), 3 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); EXPECT_EQ( m.getElement( 1, 1 ), 4 ); EXPECT_EQ( m.getElement( 1, 2 ), 5 ); EXPECT_EQ( m.getElement( 1, 3 ), 6 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 7 ); EXPECT_EQ( m.getElement( 2, 3 ), 8 ); EXPECT_EQ( m.getElement( 2, 4 ), 9 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 10 ); EXPECT_EQ( m.getElement( 3, 1 ), 0 ); EXPECT_EQ( m.getElement( 3, 2 ), 0 ); EXPECT_EQ( m.getElement( 3, 3 ), 0 ); EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 11 ); EXPECT_EQ( m.getElement( 4, 2 ), 0 ); EXPECT_EQ( m.getElement( 4, 3 ), 0 ); EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); EXPECT_EQ( m.getElement( 5, 2 ), 0 ); EXPECT_EQ( m.getElement( 5, 3 ), 12 ); EXPECT_EQ( m.getElement( 5, 4 ), 0 ); - + // Add new elements to the old elements with a multiplying factor applied to the old elements. /* @@ -452,7 +748,7 @@ void test_AddElement() * | 0 11 0 0 0 | * \ 0 0 0 12 0 / */ - + /* * The following setup results in the following 6x5 sparse matrix: * @@ -463,57 +759,57 @@ void test_AddElement() * | 0 35 14 15 0 | * \ 0 0 16 41 18 / */ - + RealType newValue = 1; for( IndexType i = 0; i < cols - 2; i++ ) // 0th row m.addElement( 0, i, newValue++, 2.0 ); - + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row m.addElement( 1, i, newValue++, 2.0 ); - + for( IndexType i = 2; i < cols; i++ ) // 2nd row m.addElement( 2, i, newValue++, 2.0 ); - + for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row m.addElement( 3, i, newValue++, 2.0 ); - + for( IndexType i = 1; i < cols - 1; i++ ) // 4th row m.addElement( 4, i, newValue++, 2.0 ); - + for( IndexType i = 2; i < cols; i++ ) // 5th row m.addElement( 5, i, newValue++, 2.0 ); - - + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); EXPECT_EQ( m.getElement( 0, 1 ), 6 ); EXPECT_EQ( m.getElement( 0, 2 ), 9 ); EXPECT_EQ( m.getElement( 0, 3 ), 0 ); EXPECT_EQ( m.getElement( 0, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); EXPECT_EQ( m.getElement( 1, 1 ), 12 ); EXPECT_EQ( m.getElement( 1, 2 ), 15 ); EXPECT_EQ( m.getElement( 1, 3 ), 18 ); EXPECT_EQ( m.getElement( 1, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 21 ); EXPECT_EQ( m.getElement( 2, 3 ), 24 ); EXPECT_EQ( m.getElement( 2, 4 ), 27 ); - + EXPECT_EQ( m.getElement( 3, 0 ), 30 ); EXPECT_EQ( m.getElement( 3, 1 ), 11 ); EXPECT_EQ( m.getElement( 3, 2 ), 12 ); EXPECT_EQ( m.getElement( 3, 3 ), 0 ); EXPECT_EQ( m.getElement( 3, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); EXPECT_EQ( m.getElement( 4, 1 ), 35 ); EXPECT_EQ( m.getElement( 4, 2 ), 14 ); EXPECT_EQ( m.getElement( 4, 3 ), 15 ); EXPECT_EQ( m.getElement( 4, 4 ), 0 ); - + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); EXPECT_EQ( m.getElement( 5, 1 ), 0 ); EXPECT_EQ( m.getElement( 5, 2 ), 16 ); @@ -527,7 +823,7 @@ void test_SetRow() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 3x7 sparse matrix: * @@ -535,10 +831,10 @@ void test_SetRow() * | 2 2 2 0 0 0 0 | * \ 3 3 3 0 0 0 0 / */ - + const IndexType rows = 3; const IndexType cols = 7; - + Matrix m; m.reset(); m.setDimensions( rows, cols ); @@ -547,7 +843,7 @@ void test_SetRow() rowLengths.setValue( 6 ); rowLengths.setElement( 1, 3 ); m.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < 3; i++ ) { @@ -555,19 +851,19 @@ void test_SetRow() m.setElement( 1, i, value + 1 ); m.setElement( 2, i, value + 2 ); } - + RealType row1 [ 3 ] = { 11, 11, 11 }; IndexType colIndexes1 [ 3 ] = { 0, 1, 2 }; RealType row2 [ 3 ] = { 22, 22, 22 }; IndexType colIndexes2 [ 3 ] = { 0, 1, 2 }; RealType row3 [ 3 ] = { 33, 33, 33 }; IndexType colIndexes3 [ 3 ] = { 3, 4, 5 }; - + RealType row = 0; IndexType elements = 3; - + m.setRow( row++, colIndexes1, row1, elements ); m.setRow( row++, colIndexes2, row2, elements ); m.setRow( row++, colIndexes3, row3, elements ); - - + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); EXPECT_EQ( m.getElement( 0, 1 ), 11 ); EXPECT_EQ( m.getElement( 0, 2 ), 11 ); @@ -575,7 +871,7 @@ void test_SetRow() EXPECT_EQ( m.getElement( 0, 4 ), 0 ); EXPECT_EQ( m.getElement( 0, 5 ), 0 ); EXPECT_EQ( m.getElement( 0, 6 ), 0 ); - + EXPECT_EQ( m.getElement( 1, 0 ), 22 ); EXPECT_EQ( m.getElement( 1, 1 ), 22 ); EXPECT_EQ( m.getElement( 1, 2 ), 22 ); @@ -583,7 +879,7 @@ void test_SetRow() EXPECT_EQ( m.getElement( 1, 4 ), 0 ); EXPECT_EQ( m.getElement( 1, 5 ), 0 ); EXPECT_EQ( m.getElement( 1, 6 ), 0 ); - + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); EXPECT_EQ( m.getElement( 2, 1 ), 0 ); EXPECT_EQ( m.getElement( 2, 2 ), 0 ); @@ -600,7 +896,7 @@ void test_VectorProduct() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; - + /* * Sets up the following 4x4 sparse matrix: * @@ -609,10 +905,10 @@ void test_VectorProduct() * | 0 4 0 0 | * \ 0 0 5 0 / */ - + const IndexType m_rows_1 = 4; const IndexType m_cols_1 = 4; - + Matrix m_1; m_1.reset(); m_1.setDimensions( m_rows_1, m_cols_1 ); @@ -623,37 +919,37 @@ void test_VectorProduct() rowLengths_1.setElement( 2, 1 ); rowLengths_1.setElement( 3, 1 ); m_1.setCompressedRowLengths( rowLengths_1 ); - + RealType value_1 = 1; m_1.setElement( 0, 0, value_1++ ); // 0th row - + m_1.setElement( 1, 1, value_1++ ); // 1st row m_1.setElement( 1, 3, value_1++ ); - + m_1.setElement( 2, 1, value_1++ ); // 2nd row - + m_1.setElement( 3, 2, value_1++ ); // 3rd row - + VectorType inVector_1; inVector_1.setSize( m_cols_1 ); - for( IndexType i = 0; i < inVector_1.getSize(); i++ ) + for( IndexType i = 0; i < inVector_1.getSize(); i++ ) inVector_1.setElement( i, 2 ); - VectorType outVector_1; + VectorType outVector_1; outVector_1.setSize( m_rows_1 ); for( IndexType j = 0; j < outVector_1.getSize(); j++ ) outVector_1.setElement( j, 0 ); - - + + m_1.vectorProduct( inVector_1, outVector_1 ); - - + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); - - + + /* * Sets up the following 4x4 sparse matrix: * @@ -662,10 +958,10 @@ void test_VectorProduct() * | 5 6 7 0 | * \ 0 8 0 0 / */ - + const IndexType m_rows_2 = 4; const IndexType m_cols_2 = 4; - + Matrix m_2; m_2.reset(); m_2.setDimensions( m_rows_2, m_cols_2 ); @@ -675,39 +971,39 @@ void test_VectorProduct() rowLengths_2.setElement( 1, 1 ); rowLengths_2.setElement( 3, 1 ); m_2.setCompressedRowLengths( rowLengths_2 ); - + RealType value_2 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row m_2.setElement( 0, i, value_2++ ); - + m_2.setElement( 1, 3, value_2++ ); // 1st row - + for( IndexType i = 0; i < 3; i++ ) // 2nd row m_2.setElement( 2, i, value_2++ ); - + for( IndexType i = 1; i < 2; i++ ) // 3rd row m_2.setElement( 3, i, value_2++ ); - + VectorType inVector_2; inVector_2.setSize( m_cols_2 ); - for( IndexType i = 0; i < inVector_2.getSize(); i++ ) + for( IndexType i = 0; i < inVector_2.getSize(); i++ ) inVector_2.setElement( i, 2 ); - VectorType outVector_2; + VectorType outVector_2; outVector_2.setSize( m_rows_2 ); for( IndexType j = 0; j < outVector_2.getSize(); j++ ) outVector_2.setElement( j, 0 ); - - + + m_2.vectorProduct( inVector_2, outVector_2 ); - - + + EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); - - + + /* * Sets up the following 4x4 sparse matrix: * @@ -716,10 +1012,10 @@ void test_VectorProduct() * | 7 8 9 0 | * \ 0 10 11 12 / */ - + const IndexType m_rows_3 = 4; const IndexType m_cols_3 = 4; - + Matrix m_3; m_3.reset(); m_3.setDimensions( m_rows_3, m_cols_3 ); @@ -727,40 +1023,40 @@ void test_VectorProduct() rowLengths_3.setSize( m_rows_3 ); rowLengths_3.setValue( 3 ); m_3.setCompressedRowLengths( rowLengths_3 ); - + RealType value_3 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row m_3.setElement( 0, i, value_3++ ); - + for( IndexType i = 1; i < 4; i++ ) m_3.setElement( 1, i, value_3++ ); // 1st row - + for( IndexType i = 0; i < 3; i++ ) // 2nd row m_3.setElement( 2, i, value_3++ ); - + for( IndexType i = 1; i < 4; i++ ) // 3rd row m_3.setElement( 3, i, value_3++ ); - + VectorType inVector_3; inVector_3.setSize( m_cols_3 ); - for( IndexType i = 0; i < inVector_3.getSize(); i++ ) + for( IndexType i = 0; i < inVector_3.getSize(); i++ ) inVector_3.setElement( i, 2 ); - VectorType outVector_3; + VectorType outVector_3; outVector_3.setSize( m_rows_3 ); for( IndexType j = 0; j < outVector_3.getSize(); j++ ) outVector_3.setElement( j, 0 ); - - + + m_3.vectorProduct( inVector_3, outVector_3 ); - - + + EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); - - + + /* * Sets up the following 8x8 sparse matrix: * @@ -773,10 +1069,10 @@ void test_VectorProduct() * | 26 27 28 29 30 0 0 0 | * \ 31 32 33 34 35 0 0 0 / */ - + const IndexType m_rows_4 = 8; const IndexType m_cols_4 = 8; - + Matrix m_4; m_4.reset(); m_4.setDimensions( m_rows_4, m_cols_4 ); @@ -787,48 +1083,48 @@ void test_VectorProduct() rowLengths_4.setElement( 6, 5 ); rowLengths_4.setElement( 7, 5 ); m_4.setCompressedRowLengths( rowLengths_4 ); - + RealType value_4 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row m_4.setElement( 0, i, value_4++ ); - + m_4.setElement( 0, 5, value_4++ ); - + for( IndexType i = 1; i < 5; i++ ) // 1st row m_4.setElement( 1, i, value_4++ ); - + for( IndexType i = 0; i < 5; i++ ) // 2nd row m_4.setElement( 2, i, value_4++ ); - + for( IndexType i = 1; i < 5; i++ ) // 3rd row m_4.setElement( 3, i, value_4++ ); - + for( IndexType i = 2; i < 6; i++ ) // 4th row m_4.setElement( 4, i, value_4++ ); - + for( IndexType i = 3; i < 7; i++ ) // 5th row m_4.setElement( 5, i, value_4++ ); - + for( IndexType i = 0; i < 5; i++ ) // 6th row m_4.setElement( 6, i, value_4++ ); - + for( IndexType i = 0; i < 5; i++ ) // 7th row m_4.setElement( 7, i, value_4++ ); - + VectorType inVector_4; inVector_4.setSize( m_cols_4 ); - for( IndexType i = 0; i < inVector_4.getSize(); i++ ) + for( IndexType i = 0; i < inVector_4.getSize(); i++ ) inVector_4.setElement( i, 2 ); - VectorType outVector_4; + VectorType outVector_4; outVector_4.setSize( m_rows_4 ); for( IndexType j = 0; j < outVector_4.getSize(); j++ ) outVector_4.setElement( j, 0 ); - - + + m_4.vectorProduct( inVector_4, outVector_4 ); - - + + EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); @@ -837,20 +1133,20 @@ void test_VectorProduct() EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); - - -/* - * Sets up the following 8x8 sparse matrix: - * - * / 1 2 3 0 4 5 0 1 \ 6 - * | 0 6 0 7 0 0 0 1 | 3 - * | 0 8 9 0 10 0 0 1 | 4 - * | 0 11 12 13 14 0 0 1 | 5 - * | 0 15 0 0 0 0 0 1 | 2 - * | 0 16 17 18 19 20 21 1 | 7 - * | 22 23 24 25 26 27 28 1 | 8 - * \ 29 30 31 32 33 34 35 36 / 8 - */ + + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ const IndexType m_rows_5 = 8; const IndexType m_cols_5 = 8; @@ -901,20 +1197,18 @@ void test_VectorProduct() for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows m_5.setElement( i, 7, 1); - + VectorType inVector_5; inVector_5.setSize( m_cols_5 ); - for( IndexType i = 0; i < inVector_5.getSize(); i++ ) + for( IndexType i = 0; i < inVector_5.getSize(); i++ ) inVector_5.setElement( i, 2 ); - VectorType outVector_5; + VectorType outVector_5; outVector_5.setSize( m_rows_5 ); for( IndexType j = 0; j < outVector_5.getSize(); j++ ) outVector_5.setElement( j, 0 ); - - + m_5.vectorProduct( inVector_5, outVector_5 ); - EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); @@ -926,13 +1220,118 @@ void test_VectorProduct() EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); } +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + typename Matrix::RowsCapacitiesType rowsCapacities( rows ); + //rowLengths.setSize( rows ); + rowsCapacities.setElement(0, 6); + rowsCapacities.setElement(1, 3); + rowsCapacities.setElement(2, 4); + rowsCapacities.setElement(3, 5); + rowsCapacities.setElement(4, 2); + rowsCapacities.setElement(5, 7); + rowsCapacities.setElement(6, 8); + rowsCapacities.setElement(7, 8); + m.setCompressedRowLengths( rowsCapacities ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 0, 4, value++ ); // 0th row + m.setElement( 0, 5, value++ ); + + m.setElement( 1, 1, value++ ); // 1st row + m.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 2, 4, value++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + m.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m.setElement( i, 7, 1); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( rows ); + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + m.allRowsReduction( fetch, reduce, keep, 0 ); + EXPECT_EQ( rowsCapacities, rowLengths ); + m.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowsCapacities, rowLengths ); + + //// + // Compute max norm + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 +} + template< typename Matrix > void test_PerformSORIteration() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 4x4 sparse matrix: * @@ -941,10 +1340,10 @@ void test_PerformSORIteration() * | 0 1 4 1 | * \ 0 0 1 4 / */ - + const IndexType m_rows = 4; const IndexType m_cols = 4; - + Matrix m; m.reset(); m.setDimensions( m_rows, m_cols ); @@ -952,54 +1351,54 @@ void test_PerformSORIteration() rowLengths.setSize( m_rows ); rowLengths.setValue( 3 ); m.setCompressedRowLengths( rowLengths ); - + m.setElement( 0, 0, 4.0 ); // 0th row m.setElement( 0, 1, 1.0); - + m.setElement( 1, 0, 1.0 ); // 1st row m.setElement( 1, 1, 4.0 ); m.setElement( 1, 2, 1.0 ); - + m.setElement( 2, 1, 1.0 ); // 2nd row m.setElement( 2, 2, 4.0 ); m.setElement( 2, 3, 1.0 ); - + m.setElement( 3, 2, 1.0 ); // 3rd row m.setElement( 3, 3, 4.0 ); - + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; RealType xVector [ 4 ] = { 1, 1, 1, 1 }; - + IndexType row = 0; RealType omega = 1; - - + + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], 0.0 ); EXPECT_EQ( xVector[ 1 ], 1.0 ); EXPECT_EQ( xVector[ 2 ], 1.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - - + + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], 0.0 ); EXPECT_EQ( xVector[ 1 ], 0.0 ); EXPECT_EQ( xVector[ 2 ], 1.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - - + + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], 0.0 ); EXPECT_EQ( xVector[ 1 ], 0.0 ); EXPECT_EQ( xVector[ 2 ], 0.0 ); EXPECT_EQ( xVector[ 3 ], 1.0 ); - - + + m.performSORIteration( bVector, row++, xVector, omega); - + EXPECT_EQ( xVector[ 0 ], 0.0 ); EXPECT_EQ( xVector[ 1 ], 0.0 ); EXPECT_EQ( xVector[ 2 ], 0.0 ); @@ -1013,7 +1412,7 @@ void test_OperatorEquals() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + if( std::is_same< DeviceType, TNL::Devices::Cuda >::value ) return; else @@ -1059,33 +1458,33 @@ void test_OperatorEquals() m_host.setElement( 0, 4, value++ ); // 0th row m_host.setElement( 0, 5, value++ ); - + m_host.setElement( 1, 1, value++ ); // 1st row m_host.setElement( 1, 3, value++ ); for( IndexType i = 1; i < 3; i++ ) // 2nd row m_host.setElement( 2, i, value++ ); - + m_host.setElement( 2, 4, value++ ); // 2nd row - + for( IndexType i = 1; i < 5; i++ ) // 3rd row m_host.setElement( 3, i, value++ ); m_host.setElement( 4, 1, value++ ); // 4th row - + for( IndexType i = 1; i < 7; i++ ) // 5th row m_host.setElement( 5, i, value++ ); - + for( IndexType i = 0; i < 7; i++ ) // 6th row m_host.setElement( 6, i, value++ ); - + for( IndexType i = 0; i < 8; i++ ) // 7th row m_host.setElement( 7, i, value++ ); - + for( IndexType i = 0; i < 7; i++ ) // 1s at the end or rows: 5, 6 m_host.setElement( i, 7, 1); - + EXPECT_EQ( m_host.getElement( 0, 0 ), 1 ); EXPECT_EQ( m_host.getElement( 0, 1 ), 2 ); EXPECT_EQ( m_host.getElement( 0, 2 ), 3 ); @@ -1094,7 +1493,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); @@ -1103,7 +1502,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); @@ -1112,7 +1511,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); @@ -1121,7 +1520,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); @@ -1130,7 +1529,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); @@ -1139,7 +1538,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); @@ -1148,7 +1547,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); @@ -1178,7 +1577,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 0, 5 ), 5 ); EXPECT_EQ( m_host.getElement( 0, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 0, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 1, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 1 ), 6 ); EXPECT_EQ( m_host.getElement( 1, 2 ), 0 ); @@ -1187,7 +1586,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 1, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 1, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 2, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 1 ), 8 ); EXPECT_EQ( m_host.getElement( 2, 2 ), 9 ); @@ -1196,7 +1595,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 2, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 2, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 3, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 1 ), 11 ); EXPECT_EQ( m_host.getElement( 3, 2 ), 12 ); @@ -1205,7 +1604,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 3, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 3, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 4, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 1 ), 15 ); EXPECT_EQ( m_host.getElement( 4, 2 ), 0 ); @@ -1214,7 +1613,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 4, 5 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 6 ), 0 ); EXPECT_EQ( m_host.getElement( 4, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 5, 0 ), 0 ); EXPECT_EQ( m_host.getElement( 5, 1 ), 16 ); EXPECT_EQ( m_host.getElement( 5, 2 ), 17 ); @@ -1223,7 +1622,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 5, 5 ), 20 ); EXPECT_EQ( m_host.getElement( 5, 6 ), 21 ); EXPECT_EQ( m_host.getElement( 5, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 6, 0 ), 22 ); EXPECT_EQ( m_host.getElement( 6, 1 ), 23 ); EXPECT_EQ( m_host.getElement( 6, 2 ), 24 ); @@ -1232,7 +1631,7 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 6, 5 ), 27 ); EXPECT_EQ( m_host.getElement( 6, 6 ), 28 ); EXPECT_EQ( m_host.getElement( 6, 7 ), 1 ); - + EXPECT_EQ( m_host.getElement( 7, 0 ), 29 ); EXPECT_EQ( m_host.getElement( 7, 1 ), 30 ); EXPECT_EQ( m_host.getElement( 7, 2 ), 31 ); @@ -1241,22 +1640,22 @@ void test_OperatorEquals() EXPECT_EQ( m_host.getElement( 7, 5 ), 34 ); EXPECT_EQ( m_host.getElement( 7, 6 ), 35 ); EXPECT_EQ( m_host.getElement( 7, 7 ), 36 ); - + // Try vectorProduct with copied cuda matrix to see if it works correctly. using VectorType = TNL::Containers::Vector< RealType, TNL::Devices::Cuda, IndexType >; - + VectorType inVector; inVector.setSize( m_cols ); - for( IndexType i = 0; i < inVector.getSize(); i++ ) + for( IndexType i = 0; i < inVector.getSize(); i++ ) inVector.setElement( i, 2 ); - VectorType outVector; + VectorType outVector; outVector.setSize( m_rows ); for( IndexType j = 0; j < outVector.getSize(); j++ ) outVector.setElement( j, 0 ); - + m_cuda.vectorProduct( inVector, outVector ); - + EXPECT_EQ( outVector.getElement( 0 ), 32 ); EXPECT_EQ( outVector.getElement( 1 ), 28 ); EXPECT_EQ( outVector.getElement( 2 ), 56 ); @@ -1274,7 +1673,7 @@ void test_SaveAndLoad( const char* filename ) using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 4x4 sparse matrix: * @@ -1283,10 +1682,10 @@ void test_SaveAndLoad( const char* filename ) * | 6 7 8 0 | * \ 0 9 10 11 / */ - + const IndexType m_rows = 4; const IndexType m_cols = 4; - + Matrix savedMatrix; savedMatrix.reset(); savedMatrix.setDimensions( m_rows, m_cols ); @@ -1294,22 +1693,22 @@ void test_SaveAndLoad( const char* filename ) rowLengths.setSize( m_rows ); rowLengths.setValue( 3 ); savedMatrix.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row savedMatrix.setElement( 0, i, value++ ); - + savedMatrix.setElement( 1, 1, value++ ); savedMatrix.setElement( 1, 3, value++ ); // 1st row - + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row savedMatrix.setElement( 2, i, value++ ); - + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row savedMatrix.setElement( 3, i, value++ ); - + ASSERT_NO_THROW( savedMatrix.save( filename ) ); - + Matrix loadedMatrix; loadedMatrix.reset(); loadedMatrix.setDimensions( m_rows, m_cols ); @@ -1317,51 +1716,51 @@ void test_SaveAndLoad( const char* filename ) rowLengths2.setSize( m_rows ); rowLengths2.setValue( 3 ); loadedMatrix.setCompressedRowLengths( rowLengths2 ); - - + + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); - - + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); - + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); - + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); - + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); - + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); - + EXPECT_EQ( std::remove( filename ), 0 ); } @@ -1371,7 +1770,7 @@ void test_Print() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - + /* * Sets up the following 5x4 sparse matrix: * @@ -1381,10 +1780,10 @@ void test_Print() * | 0 8 9 10 | * \ 0 0 11 12 / */ - + const IndexType m_rows = 5; const IndexType m_cols = 4; - + Matrix m; m.reset(); m.setDimensions( m_rows, m_cols ); @@ -1392,40 +1791,40 @@ void test_Print() rowLengths.setSize( m_rows ); rowLengths.setValue( 3 ); m.setCompressedRowLengths( rowLengths ); - + RealType value = 1; for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row m.setElement( 0, i, value++ ); - + m.setElement( 1, 3, value++ ); // 1st row - + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row m.setElement( 2, i, value++ ); - + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row m.setElement( 3, i, value++ ); - + for( IndexType i = 2; i < m_cols; i++ ) // 4th row m.setElement( 4, i, value++ ); - + #include <sstream> std::stringstream printed; std::stringstream couted; - + //change the underlying buffer and save the old buffer - auto old_buf = std::cout.rdbuf(printed.rdbuf()); + auto old_buf = std::cout.rdbuf(printed.rdbuf()); m.print( std::cout ); //all the std::cout goes to ss std::cout.rdbuf(old_buf); //reset - + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" "Row: 1 -> Col:3->4\t\n" "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" "Row: 4 -> Col:2->11 Col:3->12\t\n"; - - + + EXPECT_EQ( printed.str(), couted.str() ); } diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h similarity index 85% rename from src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h index 7effb52cd864fc61c6cc27345694c00c487c0328..8e07205e5e9c012a05e75923c8065c4aabf1717e 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_AdEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_AdEllpack.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/AdEllpack.h> +#include <TNL/Matrices/Legacy/AdEllpack.h> #include "SparseMatrixTest.hpp" #include <iostream> @@ -27,10 +27,6 @@ protected: // types for which MatrixTest is instantiated using AdEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::AdEllpack< int, TNL::Devices::Host, short >, - TNL::Matrices::AdEllpack< long, TNL::Devices::Host, short >, - TNL::Matrices::AdEllpack< float, TNL::Devices::Host, short >, - TNL::Matrices::AdEllpack< double, TNL::Devices::Host, short >, TNL::Matrices::AdEllpack< int, TNL::Devices::Host, int >, TNL::Matrices::AdEllpack< long, TNL::Devices::Host, int >, TNL::Matrices::AdEllpack< float, TNL::Devices::Host, int >, @@ -40,11 +36,7 @@ using AdEllpackMatrixTypes = ::testing::Types TNL::Matrices::AdEllpack< float, TNL::Devices::Host, long >, TNL::Matrices::AdEllpack< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::AdEllpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::AdEllpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::AdEllpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::AdEllpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::AdEllpack< int, TNL::Devices::Cuda, int >, + ,TNL::Matrices::AdEllpack< int, TNL::Devices::Cuda, int >, TNL::Matrices::AdEllpack< long, TNL::Devices::Cuda, int >, TNL::Matrices::AdEllpack< float, TNL::Devices::Cuda, int >, TNL::Matrices::AdEllpack< double, TNL::Devices::Cuda, int >, @@ -133,4 +125,4 @@ TYPED_TEST( AdEllpackMatrixTest, printTest ) #endif -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h similarity index 87% rename from src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h index 33e530be57e6675bac01f735547a79b4731b57a9..c386481071f7ce2e4a209d3457f47a060e8dae98 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_BiEllpack.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/BiEllpack.h> +#include <TNL/Matrices/Legacy/BiEllpack.h> #include "SparseMatrixTest.hpp" #include <iostream> @@ -27,10 +27,6 @@ protected: // types for which MatrixTest is instantiated using BiEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::BiEllpack< int, TNL::Devices::Host, short >, - TNL::Matrices::BiEllpack< long, TNL::Devices::Host, short >, - TNL::Matrices::BiEllpack< float, TNL::Devices::Host, short >, - TNL::Matrices::BiEllpack< double, TNL::Devices::Host, short >, TNL::Matrices::BiEllpack< int, TNL::Devices::Host, int >, TNL::Matrices::BiEllpack< long, TNL::Devices::Host, int >, TNL::Matrices::BiEllpack< float, TNL::Devices::Host, int >, @@ -40,11 +36,7 @@ using BiEllpackMatrixTypes = ::testing::Types TNL::Matrices::BiEllpack< float, TNL::Devices::Host, long >, TNL::Matrices::BiEllpack< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::BiEllpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::BiEllpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::BiEllpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::BiEllpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::BiEllpack< int, TNL::Devices::Cuda, int >, + ,TNL::Matrices::BiEllpack< int, TNL::Devices::Cuda, int >, TNL::Matrices::BiEllpack< long, TNL::Devices::Cuda, int >, TNL::Matrices::BiEllpack< float, TNL::Devices::Cuda, int >, TNL::Matrices::BiEllpack< double, TNL::Devices::Cuda, int >, @@ -142,4 +134,4 @@ TYPED_TEST( BiEllpackMatrixTest, printTest ) } #endif // HAVE_GTEST -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp new file mode 100644 index 0000000000000000000000000000000000000000..258ad2c53831010111eeec9dc240368ae5dffb35 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu new file mode 100644 index 0000000000000000000000000000000000000000..258ad2c53831010111eeec9dc240368ae5dffb35 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.cu @@ -0,0 +1 @@ +#include "SparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h new file mode 100644 index 0000000000000000000000000000000000000000..13c1ed6e00c872623c66fc88d8e57cb94ffb7557 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_CSR.h @@ -0,0 +1,131 @@ +/*************************************************************************** + SparseMatrixTest_CSR.h - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Matrices/Legacy/CSR.h> + +#include "SparseMatrixTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Matrix > +class CSRMatrixTest : public ::testing::Test +{ +protected: + using CSRMatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using CSRMatrixTypes = ::testing::Types +< + TNL::Matrices::CSR< int, TNL::Devices::Host, int >, + TNL::Matrices::CSR< long, TNL::Devices::Host, int >, + TNL::Matrices::CSR< float, TNL::Devices::Host, int >, + TNL::Matrices::CSR< double, TNL::Devices::Host, int >, + TNL::Matrices::CSR< int, TNL::Devices::Host, long >, + TNL::Matrices::CSR< long, TNL::Devices::Host, long >, + TNL::Matrices::CSR< float, TNL::Devices::Host, long >, + TNL::Matrices::CSR< double, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >, + TNL::Matrices::CSR< long, TNL::Devices::Cuda, int >, + TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >, + TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >, + TNL::Matrices::CSR< int, TNL::Devices::Cuda, long >, + TNL::Matrices::CSR< long, TNL::Devices::Cuda, long >, + TNL::Matrices::CSR< float, TNL::Devices::Cuda, long >, + TNL::Matrices::CSR< double, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes); + +TYPED_TEST( CSRMatrixTest, setDimensionsTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetDimensions< CSRMatrixType >(); +} + +//TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) +//{ +//// using CSRMatrixType = typename TestFixture::CSRMatrixType; +// +//// test_SetCompressedRowLengths< CSRMatrixType >(); +// +// bool testRan = false; +// EXPECT_TRUE( testRan ); +// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; +// std::cout << " This test is dependent on the input format. \n"; +// std::cout << " Almost every format allocates elements per row differently.\n\n"; +// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; +//} + +TYPED_TEST( CSRMatrixTest, setLikeTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetLike< CSRMatrixType, CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, resetTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Reset< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, setElementTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetElement< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, addElementTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_AddElement< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, setRowTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetRow< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, vectorProductTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_VectorProduct< CSRMatrixType >(); +} + +TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR" ); +} + +TYPED_TEST( CSRMatrixTest, printTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Print< CSRMatrixType >(); +} + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cpp similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cpp rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cpp diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cu similarity index 100% rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.cu rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.cu diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h similarity index 87% rename from src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h rename to src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h index 6909b53a5304df75aa021484402f1c3986ec9b5f..5d304bde3fff44ffdaf3ccbd812412faa2738824 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_ChunkedEllpack.h @@ -8,7 +8,7 @@ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/ChunkedEllpack.h> +#include <TNL/Matrices/Legacy/ChunkedEllpack.h> #include "SparseMatrixTest.hpp" #include <iostream> @@ -28,10 +28,6 @@ protected: // types for which MatrixTest is instantiated using ChEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Host, short >, - TNL::Matrices::ChunkedEllpack< long, TNL::Devices::Host, short >, - TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Host, short >, - TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Host, short >, TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Host, int >, TNL::Matrices::ChunkedEllpack< long, TNL::Devices::Host, int >, TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Host, int >, @@ -41,11 +37,7 @@ using ChEllpackMatrixTypes = ::testing::Types TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Host, long >, TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::ChunkedEllpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Cuda, int >, + ,TNL::Matrices::ChunkedEllpack< int, TNL::Devices::Cuda, int >, TNL::Matrices::ChunkedEllpack< long, TNL::Devices::Cuda, int >, TNL::Matrices::ChunkedEllpack< float, TNL::Devices::Cuda, int >, TNL::Matrices::ChunkedEllpack< double, TNL::Devices::Cuda, int >, @@ -144,4 +136,4 @@ TYPED_TEST( ChunkedEllpackMatrixTest, printTest ) #endif -#include "../main.h" +#include "../../main.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c454706f0b1d437b798f2d7a1e93ccf4c0291d3f --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_Ellpack.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu new file mode 100644 index 0000000000000000000000000000000000000000..c454706f0b1d437b798f2d7a1e93ccf4c0291d3f --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.cu @@ -0,0 +1 @@ +#include "SparseMatrixTest_Ellpack.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h new file mode 100644 index 0000000000000000000000000000000000000000..bb9fe4fc76e49a408a6a370ab5b1b9b9c1e56660 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_Ellpack.h @@ -0,0 +1,131 @@ +/*************************************************************************** + SparseMatrixTest_Ellpack.h - description + ------------------- + begin : Nov 2, 2018 + copyright : (C) 2018 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Matrices/Legacy/Ellpack.h> + +#include "SparseMatrixTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Matrix > +class EllpackMatrixTest : public ::testing::Test +{ +protected: + using EllpackMatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using EllpackMatrixTypes = ::testing::Types +< + TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >, + TNL::Matrices::Ellpack< long, TNL::Devices::Host, int >, + TNL::Matrices::Ellpack< float, TNL::Devices::Host, int >, + TNL::Matrices::Ellpack< double, TNL::Devices::Host, int >, + TNL::Matrices::Ellpack< int, TNL::Devices::Host, long >, + TNL::Matrices::Ellpack< long, TNL::Devices::Host, long >, + TNL::Matrices::Ellpack< float, TNL::Devices::Host, long >, + TNL::Matrices::Ellpack< double, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >, + TNL::Matrices::Ellpack< long, TNL::Devices::Cuda, int >, + TNL::Matrices::Ellpack< float, TNL::Devices::Cuda, int >, + TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, int >, + TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, long >, + TNL::Matrices::Ellpack< long, TNL::Devices::Cuda, long >, + TNL::Matrices::Ellpack< float, TNL::Devices::Cuda, long >, + TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes ); + +TYPED_TEST( EllpackMatrixTest, setDimensionsTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetDimensions< EllpackMatrixType >(); +} + +//TYPED_TEST( EllpackMatrixTest, setCompressedRowLengthsTest ) +//{ +//// using EllpackMatrixType = typename TestFixture::EllpackMatrixType; +// +//// test_SetCompressedRowLengths< EllpackMatrixType >(); +// +// bool testRan = false; +// EXPECT_TRUE( testRan ); +// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; +// std::cout << " This test is dependent on the input format. \n"; +// std::cout << " Almost every format allocates elements per row differently.\n\n"; +// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; +//} + +TYPED_TEST( EllpackMatrixTest, setLikeTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetLike< EllpackMatrixType, EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, resetTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Reset< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, setElementTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetElement< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, addElementTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_AddElement< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, setRowTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetRow< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, vectorProductTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_VectorProduct< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, saveAndLoadTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack" ); +} + +TYPED_TEST( EllpackMatrixTest, printTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Print< EllpackMatrixType >(); +} + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..40e2e94b81ca64051ddceee82f46dd2d20e66e42 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu new file mode 100644 index 0000000000000000000000000000000000000000..40e2e94b81ca64051ddceee82f46dd2d20e66e42 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.cu @@ -0,0 +1 @@ +#include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h new file mode 100644 index 0000000000000000000000000000000000000000..8b39583847d765ad2ede230a00eea74caec119f7 --- /dev/null +++ b/src/UnitTests/Matrices/Legacy/SparseMatrixTest_SlicedEllpack.h @@ -0,0 +1,136 @@ +/*************************************************************************** + SparseMatrixTest_SlicedEllpack.h - description + ------------------- + begin : Dec 9, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Matrices/Legacy/SlicedEllpack.h> + + +#include "SparseMatrixTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Matrix > +class SlicedEllpackMatrixTest : public ::testing::Test +{ +protected: + using SlicedEllpackMatrixType = Matrix; +}; + +template< typename Real, typename Device, typename Index > +using SlicedEllpackType = TNL::Matrices::SlicedEllpack< Real, Device, Index, 32 >; + + +// types for which MatrixTest is instantiated +using SlicedEllpackMatrixTypes = ::testing::Types +< + SlicedEllpackType< int, TNL::Devices::Host, int >, + SlicedEllpackType< long, TNL::Devices::Host, int >, + SlicedEllpackType< float, TNL::Devices::Host, int >, + SlicedEllpackType< double, TNL::Devices::Host, int >, + SlicedEllpackType< int, TNL::Devices::Host, long >, + SlicedEllpackType< long, TNL::Devices::Host, long >, + SlicedEllpackType< float, TNL::Devices::Host, long >, + SlicedEllpackType< double, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,SlicedEllpackType< int, TNL::Devices::Cuda, int >, + SlicedEllpackType< long, TNL::Devices::Cuda, int >, + SlicedEllpackType< float, TNL::Devices::Cuda, int >, + SlicedEllpackType< double, TNL::Devices::Cuda, int >, + SlicedEllpackType< int, TNL::Devices::Cuda, long >, + SlicedEllpackType< long, TNL::Devices::Cuda, long >, + SlicedEllpackType< float, TNL::Devices::Cuda, long >, + SlicedEllpackType< double, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes); + +TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetDimensions< SlicedEllpackMatrixType >(); +} + +//TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest ) +//{ +//// using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; +// +//// test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); +// +// bool testRan = false; +// EXPECT_TRUE( testRan ); +// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; +// std::cout << " This test is dependent on the input format. \n"; +// std::cout << " Almost every format allocates elements per row differently.\n\n"; +// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; +//} + +TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetLike< SlicedEllpackMatrixType, SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, resetTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Reset< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, setElementTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetElement< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, addElementTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_AddElement< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, setRowTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetRow< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_VectorProduct< SlicedEllpackMatrixType >(); +} + +TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" ); +} + +TYPED_TEST( SlicedEllpackMatrixTest, printTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Print< SlicedEllpackMatrixType >(); +} + +#endif + +#include "../../main.h" diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..639f1964086784bfdb174443a5f2554b703e511b --- /dev/null +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + MultidiagonalMatrixTest.cpp - description + ------------------- + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "MultidiagonalMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..53541edbd003d084e1b50b742472beec086d87fb --- /dev/null +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + MultidiagonalMatrixTest.cu - description + ------------------- + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "MultidiagonalMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h new file mode 100644 index 0000000000000000000000000000000000000000..49bcfa11c44d6adb5be20013a2bd2064febd208e --- /dev/null +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -0,0 +1,1596 @@ +/*************************************************************************** + MultidiagonalMatrixTest.h - description + ------------------- + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <sstream> +#include <TNL/Devices/Host.h> +#include <TNL/Matrices/Matrix.h> +#include <TNL/Matrices/Multidiagonal.h> +#include <TNL/Containers/Array.h> + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Math.h> +#include <iostream> + +using Multidiagonal_host_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >; +using Multidiagonal_host_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >; + +using Multidiagonal_cuda_float = TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >; +using Multidiagonal_cuda_int = TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >; + +static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl"; + +#ifdef HAVE_GTEST +#include <type_traits> + +#include <gtest/gtest.h> + +void test_GetSerializationType() +{ + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + const IndexType rows = 9; + const IndexType cols = 8; + const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 }; + + Matrix m; + m.setDimensions( rows, cols, diagonalsShifts ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + using DiagonalsShiftsType = typename Matrix1::DiagonalsShiftsType; + + const IndexType rows = 8; + const IndexType cols = 7; + const DiagonalsShiftsType diagonalsShifts{ -3, -1, 0, 2, 4 }; + + Matrix1 m1; + m1.setDimensions( rows + 1, cols + 2, diagonalsShifts ); + + Matrix2 m2; + m2.setDimensions( rows, cols, diagonalsShifts ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNonemptyRowsCount() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 5x8 matrix: + * + * / 1 0 0 1 0 1 0 0 \ + * | 0 1 0 0 1 0 1 0 | + * | 1 0 1 0 0 1 0 1 | + * | 0 1 0 1 0 0 1 0 | + * \ 0 0 1 0 1 0 0 1 / + */ + Matrix m1( 5, 8, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + m1.setValue( 1.0 ); + EXPECT_EQ( m1.getNonemptyRowsCount(), 5 ); + + /* + * Sets up the following 5x5 matrix: + * + * / 1 0 0 1 0 \ + * | 0 1 0 0 1 | + * | 1 0 1 0 0 | + * | 0 1 0 1 0 | + * \ 0 0 1 0 1 / + */ + Matrix m2( 5, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + m2.setValue( 1.0 ); + EXPECT_EQ( m2.getNonemptyRowsCount(), 5 ); + + /* + * Sets up the following 8x5 matrix: + * + * / 1 0 0 1 0 \ + * | 0 1 0 0 1 | + * | 1 0 1 0 0 | + * | 0 1 0 1 0 | + * | 0 0 1 0 1 | + * | 0 0 0 1 0 | + * | 0 0 0 0 1 | + * \ 0 0 0 0 0 / + */ + Matrix m3( 8, 5, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + m3.setValue( 1.0 ); + EXPECT_EQ( m3.getNonemptyRowsCount(), 7 ); +} + +template< typename Matrix > +void test_GetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 8x8 matrix: + * + * / 0 0 0 1 0 1 0 0 \ -> 2 + * | 0 1 0 0 1 0 1 0 | -> 3 + * | 1 0 1 0 0 1 0 1 | -> 4 + * | 0 1 0 1 0 0 1 0 | -> 3 + * | 0 0 1 0 1 0 0 1 | -> 3 + * | 0 0 0 1 0 1 0 0 | -> 2 + * | 0 0 0 0 1 0 1 0 | -> 2 + * \ 0 0 0 0 0 1 0 0 / -> 1 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m( rows, cols, DiagonalsShiftsType({ -2, 0, 3, 5 }) ); + m.setValue( 1.0 ); + m.setElement( 0, 0, 0.0 ); + m.setElement( 7, 7, 0.0 ); + + typename Matrix::CompressedRowLengthsVector rowLengths( rows ); + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 4, 3, 3, 2, 2, 1 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix > +void test_GetAllocatedElementsCount() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m1( 7, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + EXPECT_EQ( m1.getAllocatedElementsCount(), 28 ); + + Matrix m2( 8, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + EXPECT_EQ( m2.getAllocatedElementsCount(), 32 ); + + Matrix m3( 9, 6, DiagonalsShiftsType( { -2, 0, 3, 5 } ) ); + EXPECT_EQ( m3.getAllocatedElementsCount(), 32 ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 7x6 matrix: + * + * / 0 0 1 0 1 0 \ -> 2 + * | 0 1 0 1 0 1 | -> 3 + * | 0 0 1 0 1 0 | -> 2 + * | 1 0 0 1 0 1 | -> 3 + * | 0 1 0 0 1 0 | -> 2 + * | 0 0 1 0 0 1 | -> 2 + * \ 0 0 0 1 0 0 / -> 1 + * ---- + * 15 + */ + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) ); + m.setValue( 1.0 ); + m.setElement( 0, 0, 0.0 ); + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 5x4 matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols, DiagonalsShiftsType( { 0, 1, 2, 4 } ) ); + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_SetValue() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 7x6 matrix: + * + * / 1 0 1 0 1 0 \ + * | 0 1 0 1 0 1 | + * | 0 0 1 0 1 0 | + * | 1 0 0 1 0 1 | + * | 0 1 0 0 1 0 | + * | 0 0 1 0 0 1 | + * \ 0 0 0 1 0 0 / + */ + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m( rows, cols, DiagonalsShiftsType( { -3, 0, 2, 4 } ) ); + m.setValue( 1.0 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 1 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 1 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 1 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 1 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 1 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 1 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 1 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 1 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 1 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 1 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 1 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 1 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 1 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); +} + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 5x5 matrix: + * + * / 1 2 0 0 5 \ + * | 0 7 8 0 0 | + * | 0 0 13 14 0 | + * | 16 0 0 19 20 | + * \ 0 22 0 0 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; + DiagonalsShiftsType diagonals{-3, 0, 1, 4 }; + Matrix m( rows, cols, diagonals ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + if( diagonals.containsValue( j - i ) ) + m.setElement( i, j, value++ ); + else + { + EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); + } + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 5x5 matrix: + * + * / 1 2 0 0 5 \ + * | 0 7 8 0 0 | + * | 0 0 13 14 0 | + * | 0 0 0 19 20 | + * \ 0 0 0 0 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; + DiagonalsShiftsType diagonals{-3, 0, 1, 4 }; + Matrix m( rows, cols, diagonals ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + if( diagonals.containsValue( j - i ) ) + { + if( j >= i ) + m.setElement( i, j, value ); + value++; + } + else + { + EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); + } + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 matrix: + * + * / 1 2 0 0 5 \ / 1 2 0 0 5 \ / 3 6 0 0 15 \ + * | 0 7 8 0 0 | | 0 7 8 0 0 | | 0 21 24 0 0 | + * 2 * | 0 0 13 14 0 | + | 0 0 13 14 0 | = | 0 0 39 42 0 | + * | 0 0 0 19 20 | | 16 0 0 19 20 | | 16 0 0 57 60 | + * \ 0 0 0 0 25 / \ 0 22 0 0 25 / \ 0 22 0 0 75 / + * + */ + + value = 1; + RealType multiplicator = 2; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + if( diagonals.containsValue( j - i ) ) + m.addElement( i, j, value++, multiplicator ); + else + { + EXPECT_THROW( m.addElement( i, j, value++, multiplicator ), std::logic_error ); + } + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 21 ); + EXPECT_EQ( m.getElement( 1, 2 ), 24 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 39 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 57 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 75 ); +} + +template< typename Matrix > +void test_SetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 5x7 matrix: + * + * / 1 0 2 0 3 0 0 \ + * | 4 5 0 6 0 7 0 | + * | 0 8 9 0 10 0 11 | + * | 0 0 12 13 0 14 0 | + * \ 0 0 0 15 16 0 17 / + */ + const IndexType rows = 5; + const IndexType cols = 7; + + Matrix m( rows, cols, DiagonalsShiftsType({ -1, 0, 2, 4 }) ); + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 5 ][ 4 ] { + { 0, 1, 2, 3 }, + { 4, 5, 6, 7 }, + { 8, 9, 10, 11 }, + { 12, 13, 14, 0 }, + { 15, 16, 17, 0 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, values[ rowIdx ][ i ] ); + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 4 ); + EXPECT_EQ( m.getElement( 1, 1 ), 5 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 7 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 8 ); + EXPECT_EQ( m.getElement( 2, 2 ), 9 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 10 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m.getElement( 2, 6 ), 11 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 12 ); + EXPECT_EQ( m.getElement( 3, 3 ), 13 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 14 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 15 ); + EXPECT_EQ( m.getElement( 4, 4 ), 16 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 17 ); +} + +template< typename Matrix > +void test_AddRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 6x5 matrix: + * + * / 1 2 3 0 0 \ + * | 0 7 8 9 0 | + * | 0 0 13 14 15 | + * | 0 0 0 19 20 | + * | 0 0 0 0 25 | + * \ 0 0 0 0 0 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + DiagonalsShiftsType diagonals( { -2, 0, 1, 2 } ); + + Matrix m( rows, cols, diagonals ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + IndexType offset = j - i; + if( diagonals.containsValue( offset ) && offset >= 0) + m.setElement( i, j, value ); + value++; + } + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 0 0 0 0 0 0 \ / 1 2 3 0 0 \ / 11 0 0 0 0 \ / 11 0 0 0 0 \ + * | 0 1 0 0 0 0 | | 0 7 8 9 0 | | 0 22 0 0 0 | | 0 29 8 9 0 | + * | 0 0 2 0 0 0 | * | 0 0 13 14 15 | + | 33 0 33 0 0 | = | 33 0 59 28 30 | + * | 0 0 0 3 0 0 | | 0 0 0 19 20 | | 0 44 0 44 0 | | 0 44 0 101 60 | + * | 0 0 0 0 4 0 | | 0 0 0 0 25 | | 0 0 55 0 55 | | 0 0 55 0 155 | + * \ 0 0 0 0 0 5 / \ 0 0 0 0 0 / \ 0 0 0 66 0 / \ 0 0 0 66 0 / + */ + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 6 ][ 4 ] { + { 0, 11, 0, 0 }, + { 0, 22, 0, 0 }, + { 33, 33, 0, 0 }, + { 44, 44, 0, 0 }, + { 55, 55, 0, 0 }, + { 66, 0, 0, 0 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 4; i++ ) + { + RealType& val = row.getValue( i ); + val = rowIdx * val + values[ rowIdx ][ i ]; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 29 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 33 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 59 ); + EXPECT_EQ( m.getElement( 2, 3 ), 28 ); + EXPECT_EQ( m.getElement( 2, 4 ), 30 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 44 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 101 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 55 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 155 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 66 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 5x4 matrix: + * + * / 1 0 3 0 \ + * | 0 6 0 8 | + * | 9 0 11 0 | + * | 0 14 0 16 | + * \ 0 0 19 0 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + DiagonalsShiftsType diagonals{ -2, 0, 2 }; + + Matrix m( rows, cols, diagonals ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + { + if( diagonals.containsValue( j - i ) ) + m.setElement( i, j, value ); + value++; + } + + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + VectorType inVector( 4 ); + inVector = 2; + + VectorType outVector( 5 ); + outVector = 0; + + m.vectorProduct( inVector, outVector); + + EXPECT_EQ( outVector.getElement( 0 ), 8 ); + EXPECT_EQ( outVector.getElement( 1 ), 28 ); + EXPECT_EQ( outVector.getElement( 2 ), 40 ); + EXPECT_EQ( outVector.getElement( 3 ), 60 ); + EXPECT_EQ( outVector.getElement( 4 ), 38 ); +} + +template< typename Matrix1, typename Matrix2 = Matrix1 > +void test_AddMatrix() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + using DiagonalsShiftsType1 = typename Matrix1::DiagonalsShiftsType; + using DiagonalsShiftsType2 = typename Matrix2::DiagonalsShiftsType; + + /* + * Sets up the following 5x4 matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + DiagonalsShiftsType1 diagonals1; + DiagonalsShiftsType2 diagonals2; + + Matrix1 m( rows, cols, diagonals1 ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } + + /* + * Sets up the following 5x4 matrix: + * + * / 1 2 0 0 \ + * | 3 4 5 0 | + * | 0 6 7 8 | + * | 0 0 9 10 | + * \ 0 0 0 11 / + */ + Matrix2 m2( rows, cols, diagonals2 ); + + RealType newValue = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + if( abs( i - j ) <= 1 ) + m2.setElement( i, j, newValue++ ); + + /* + * Compute the following 5x4 matrix: + * + * / 1 2 0 0 \ / 1 2 0 0 \ / 3 6 0 0 \ + * | 5 6 7 0 | | 3 4 5 0 | | 11 14 17 0 | + * | 0 10 11 12 | + 2 * | 0 6 7 8 | = | 0 22 25 28 | + * | 0 0 15 16 | | 0 0 9 10 | | 0 0 33 36 | + * \ 0 0 0 20 / \ 0 0 0 11 / \ 0 0 0 42 / + */ + + Matrix1 mResult; + mResult.reset(); + mResult.setDimensions( rows, cols ); + + mResult = m; + + RealType matrixMultiplicator = 2; + RealType thisMatrixMultiplicator = 1; + + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); + EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); + EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); + EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); + EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); + EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); + EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); + EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); + EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); + EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); + EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); + EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); + EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); + EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); + EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); + EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 11 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 14 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 17 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 22 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 25 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 28 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 33 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 36 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 42 ); +} + +template< typename Matrix > +void test_GetMatrixProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; +/* + * Sets up the following 5x4 matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType leftRows = 5; + const IndexType leftCols = 4; + DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); + + Matrix leftMatrix( leftRows, leftCols, diagonalsShifts ); + + RealType value = 1; + for( IndexType i = 0; i < leftRows; i++ ) + for( IndexType j = 0; j < leftCols; j++) + leftMatrix.setElement( i, j, value++ ); + +/* + * Sets up the following 4x5 matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * \ 16 17 18 19 20 / + */ + const IndexType rightRows = 4; + const IndexType rightCols = 5; + + Matrix rightMatrix; + rightMatrix.reset(); + rightMatrix.setDimensions( rightRows, rightCols ); + + RealType newValue = 1; + for( IndexType i = 0; i < rightRows; i++ ) + for( IndexType j = 0; j < rightCols; j++) + rightMatrix.setElement( i, j, newValue++ ); + +/* + * Sets up the following 5x5 resulting matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + Matrix mResult( leftRows, rightCols, diagonalsShifts ); + mResult.setValue( 0 ); + + RealType leftMatrixMultiplicator = 1; + RealType rightMatrixMultiplicator = 2; +/* + * / 1 2 3 4 \ / 220 240 260 280 300 \ + * | 5 6 7 8 | / 1 2 3 4 5 \ | 492 544 596 648 700 | + * 1 * | 9 10 11 12 | * 2 * | 6 7 8 9 10 | = | 764 848 932 1016 1100 | + * | 13 14 15 16 | | 11 12 13 14 15 | | 1036 1152 1268 1384 1500 | + * \ 17 18 19 20 / \ 16 17 18 19 20 / \ 1308 1456 1604 1752 1900 / + */ + + mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 220 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 240 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 260 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 280 ); + EXPECT_EQ( mResult.getElement( 0, 4 ), 300 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 492 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 544 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 596 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 648 ); + EXPECT_EQ( mResult.getElement( 1, 4 ), 700 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 764 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 848 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 932 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 ); + EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 ); + EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 1752 ); + EXPECT_EQ( mResult.getElement( 4, 4 ), 1900 ); +} + +template< typename Matrix > +void test_GetTransposition() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; +/* + * Sets up the following 3x2 matrix: + * + * / 1 2 \ + * | 3 4 | + * \ 5 6 / + */ + const IndexType rows = 3; + const IndexType cols = 2; + DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); + + Matrix m( rows, cols, diagonalsShifts ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + m.print( std::cout ); + +/* + * Sets up the following 2x3 matrix: + * + * / 0 0 0 \ + * \ 0 0 0 / + */ + Matrix mTransposed( cols, rows, diagonalsShifts ); + + mTransposed.print( std::cout ); + + RealType matrixMultiplicator = 1; + + mTransposed.getTransposition( m, matrixMultiplicator ); + + mTransposed.print( std::cout ); + +/* + * Should result in the following 2x3 matrix: + * + * / 1 3 5 \ + * \ 2 4 6 / + */ + + EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 ); + EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 ); + EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 ); + + EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 ); + EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 ); + EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 ); +} + + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; +/* + * Sets up the following 4x4 matrix: + * + * / 4 1 1 1 \ + * | 1 4 1 1 | + * | 1 1 4 1 | + * \ 1 1 1 4 / + */ + const IndexType rows = 4; + const IndexType cols = 4; + DiagonalsShiftsType diagonalsShifts( { 0, 1, 2 } ); + + Matrix m( rows, cols, diagonalsShifts ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0 ); + m.setElement( 0, 2, 1.0 ); + m.setElement( 0, 3, 1.0 ); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + m.setElement( 1, 3, 1.0 ); + + m.setElement( 2, 0, 1.0 ); + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 0, 1.0 ); // 3rd row + m.setElement( 3, 1, 1.0 ); + m.setElement( 3, 2, 1.0 ); + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; + RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; + + IndexType row = 0; + RealType omega = 1; + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 0.15625 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 0.15625 ); + EXPECT_EQ( xVector[ 3 ], 0.3671875 ); +} + +template< typename Matrix > +void test_AssignmentOperator() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + constexpr bool rowMajorOrder = Matrix::getRowMajorOrder(); + + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >; + + const IndexType rows( 10 ), columns( 10 ); + DiagonalsShiftsType diagonalsShifts( { -4, -2, 0, 2, 3, 5 } ); + MultidiagonalHost hostMatrix( rows, columns, diagonalsShifts ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( diagonalsShifts.containsValue( j - i ) ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix( rows, columns, diagonalsShifts ); + matrix.getValues() = 0.0; + matrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + if( diagonalsShifts.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + +#ifdef HAVE_CUDA + MultidiagonalCuda cudaMatrix( rows, columns, diagonalsShifts ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( diagonalsShifts.containsValue( j - i ) ) + cudaMatrix.setElement( i, j, i + j ); + + matrix.getValues() = 0.0; + matrix = cudaMatrix; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonalsShifts.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } +#endif +} + + +template< typename Matrix > +void test_SaveAndLoad() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 4x4 matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * \ 0 0 15 16 / + */ + const IndexType rows = 4; + const IndexType cols = 4; + DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } ); + + Matrix savedMatrix( rows, cols, diagonalsShifts ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( diagonalsShifts.containsValue( j - i ) ) + savedMatrix.setElement( i, j, value ); + value++; + } + + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); + + Matrix loadedMatrix; + + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + DiagonalsShiftsType diagonalsShifts( { -1, 0, 1 } ); + + Matrix m( rows, cols, diagonalsShifts ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } + + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + couted << "Row: 0 -> Col:0->1\t Col:1->2\t\n" + "Row: 1 -> Col:0->5\t Col:1->6\t Col:2->7\t\n" + "Row: 2 -> Col:1->10\t Col:2->11\t Col:3->12\t\n" + "Row: 3 -> Col:2->15\t Col:3->16\t\n" + "Row: 4 -> Col:3->20\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +// test fixture for typed tests +template< typename Matrix > +class MatrixTest : public ::testing::Test +{ +protected: + using MatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, short >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, short >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, short >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, short >, + TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, int >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, int >, + TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, long >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Host, long >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, long >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, short >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, short >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, short >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, short >, + TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, int >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, int >, + TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, long >, + TNL::Matrices::Multidiagonal< long, TNL::Devices::Cuda, long >, + TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, long >, + TNL::Matrices::Multidiagonal< double, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); + +TYPED_TEST( MatrixTest, getSerializationType ) +{ + test_GetSerializationType(); +} + +TYPED_TEST( MatrixTest, setDimensionsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetDimensions< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setLikeTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetLike< MatrixType, MatrixType >(); +} + +TYPED_TEST( MatrixTest, getNonemptyRowsCountTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNonemptyRowsCount< MatrixType >(); +} + + +TYPED_TEST( MatrixTest, getCompressedRowLengthTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetCompressedRowLengths< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getAllocatedElementsCountTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetAllocatedElementsCount< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNumberOfNonzeroMatrixElements< MatrixType >(); +} + +TYPED_TEST( MatrixTest, resetTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Reset< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setValueTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetValue< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetRow< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddRow< MatrixType >(); +} + +TYPED_TEST( MatrixTest, vectorProductTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_VectorProduct< MatrixType >(); +} + +/*TYPED_TEST( MatrixTest, addMatrixTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddMatrix< MatrixType >(); +}*/ + +TYPED_TEST( MatrixTest, assignmentOperatorTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AssignmentOperator< MatrixType >(); +} + +TYPED_TEST( MatrixTest, saveAndLoadTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SaveAndLoad< MatrixType >(); +} + +TYPED_TEST( MatrixTest, printTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Print< MatrixType >(); +} + +/*TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Host ) +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(609): error: no instance of function template \"TNL::Matrices::MultidiagonalMatrixProductKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, Multidiagonal_host_int *, const int, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Multidiagonal_host_int, Matrix2=Multidiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1315): here\n\n"; +} + +#ifdef HAVE_CUDA +TEST( MultidiagonalMatrixTest, Multidiagonal_getMatrixProductTest_Cuda ) +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on GPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::MultidiagonalMatrixProductKernel<Real,Index,Matrix1,Matrix2,tileDim,tileRowBlockSize>(TNL::Matrices::Multidiagonal<Real, TNL::Devices::Cuda, Index> *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Multidiagonal_cuda_int, Matrix2=Multidiagonal_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n"; + std::cout << " instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Multidiagonal_cuda_int, Matrix2=Multidiagonal_cuda_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Multidiagonal_cuda_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1332): here\n\n"; +} +#endif + +TEST( MultidiagonalMatrixTest, Multidiagonal_getTranspositionTest_Host ) +{ +// test_GetTransposition< Multidiagonal_host_int >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(836): error: no instance of function template \"TNL::Matrices::MultidiagonalTranspositionAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Multidiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1420): here\n\n"; + std::cout << "AND this message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h(852): error: no instance of function template \"TNL::Matrices::MultidiagonalTranspositionNonAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Multidiagonal<int, TNL::Devices::Host, int> *, Multidiagonal_host_int *, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Multidiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Multidiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Multidiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Multidiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/MultidiagonalMatrixTest.h(1420): here\n\n"; +} + +#ifdef HAVE_CUDA +TEST( MultidiagonalMatrixTest, Multidiagonal_getTranspositionTest_Cuda ) +{ +// test_GetTransposition< Multidiagonal_cuda_int >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on GPU, this test throws the following message: \n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Multidiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n"; + std::cout << " what(): CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n"; + std::cout << " Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n"; + std::cout << " [1] 4003 abort (core dumped) ./MultidiagonalMatrixTest-dbg\n"; +} +#endif + +TEST( MultidiagonalMatrixTest, Multidiagonal_performSORIterationTest_Host ) +{ + test_PerformSORIteration< Multidiagonal_host_float >(); +} + +#ifdef HAVE_CUDA +TEST( MultidiagonalMatrixTest, Multidiagonal_performSORIterationTest_Cuda ) +{ +// test_PerformSORIteration< Multidiagonal_cuda_float >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched, this test throws the following message: \n"; + std::cout << " [1] 6992 segmentation fault (core dumped) ./SparseMatrixTest-dbg\n\n"; + std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n"; +} +#endif + * */ + +#endif // HAVE_GTEST + +#include "../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index 9b09ef4d45cc4ee2e27fb582aa6f56e3de7e09b5..829c30677b2c7e3a0209ed72c01a991ffec56d1c 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -8,18 +8,34 @@ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/CSR.h> -#include <TNL/Matrices/Ellpack.h> -#include <TNL/Matrices/SlicedEllpack.h> - -using CSR_host = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; -using CSR_cuda = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; -using E_host = TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >; -using E_cuda = TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >; -using SE_host = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int, 2 >; -using SE_cuda = TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int, 2 >; - -#ifdef HAVE_GTEST +#include <TNL/Matrices/Legacy/CSR.h> +#include <TNL/Matrices/Legacy/Ellpack.h> +#include <TNL/Matrices/Legacy/SlicedEllpack.h> + +#include <TNL/Matrices/SparseMatrix.h> +#include <TNL/Matrices/MatrixType.h> +#include <TNL/Matrices/Dense.h> +#include <TNL/Matrices/Tridiagonal.h> +#include <TNL/Matrices/Multidiagonal.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Containers/Segments/SlicedEllpack.h> + +template< typename Device, typename Index, typename IndexAllocator > +using EllpackSegments = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator >; + +template< typename Device, typename Index, typename IndexAllocator > +using SlicedEllpackSegments = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator >; + +using CSR_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using CSR_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >; +using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; +using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; +using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; + + +#ifdef HAVE_GTEST #include <gtest/gtest.h> /* @@ -41,7 +57,6 @@ void setupUnevenRowSizeMatrix( Matrix& m ) { const int rows = 10; const int cols = 6; - m.reset(); m.setDimensions( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( rows ); @@ -78,7 +93,7 @@ void setupUnevenRowSizeMatrix( Matrix& m ) m.setElement( 7, 0, value++ ); // 7th row - for( int i = 0; i < cols - 1; i++ ) // 8th row + for( int i = 0; i < cols - 1; i++ ) // 8th row m.setElement( 8, i, value++ ); m.setElement( 9, 5, value++ ); // 9th row @@ -138,21 +153,21 @@ void checkUnevenRowSizeMatrix( Matrix& m ) EXPECT_EQ( m.getElement( 6, 3 ), 0 ); EXPECT_EQ( m.getElement( 6, 4 ), 0 ); EXPECT_EQ( m.getElement( 6, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 7, 0 ), 22 ); EXPECT_EQ( m.getElement( 7, 1 ), 0 ); EXPECT_EQ( m.getElement( 7, 2 ), 0 ); EXPECT_EQ( m.getElement( 7, 3 ), 0 ); EXPECT_EQ( m.getElement( 7, 4 ), 0 ); EXPECT_EQ( m.getElement( 7, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 8, 0 ), 23 ); EXPECT_EQ( m.getElement( 8, 1 ), 24 ); EXPECT_EQ( m.getElement( 8, 2 ), 25 ); EXPECT_EQ( m.getElement( 8, 3 ), 26 ); EXPECT_EQ( m.getElement( 8, 4 ), 27 ); EXPECT_EQ( m.getElement( 8, 5 ), 0 ); - + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); EXPECT_EQ( m.getElement( 9, 1 ), 0 ); EXPECT_EQ( m.getElement( 9, 2 ), 0 ); @@ -185,7 +200,7 @@ void setupAntiTriDiagMatrix( Matrix& m ) rowLengths.setElement( 0, 4); rowLengths.setElement( 1, 4 ); m.setCompressedRowLengths( rowLengths ); - + int value = 1; for( int i = 0; i < rows; i++ ) for( int j = cols - 1; j > 2; j-- ) @@ -341,81 +356,233 @@ template< typename Matrix1, typename Matrix2 > void testCopyAssignment() { { - SCOPED_TRACE("Tri Diagonal Matrix"); - - Matrix1 triDiag1; - setupTriDiagMatrix( triDiag1 ); - checkTriDiagMatrix( triDiag1 ); - - Matrix2 triDiag2; - triDiag2 = triDiag1; - checkTriDiagMatrix( triDiag2 ); + SCOPED_TRACE("Tri Diagonal Matrix"); + + Matrix1 triDiag1; + setupTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag1 ); + + Matrix2 triDiag2; + triDiag2 = triDiag1; + checkTriDiagMatrix( triDiag1 ); + checkTriDiagMatrix( triDiag2 ); } - { - SCOPED_TRACE("Anti Tri Diagonal Matrix"); - - Matrix1 antiTriDiag1; - setupAntiTriDiagMatrix( antiTriDiag1 ); - checkAntiTriDiagMatrix( antiTriDiag1 ); - - Matrix2 antiTriDiag2; - antiTriDiag2 = antiTriDiag1; - checkAntiTriDiagMatrix( antiTriDiag2 ); + SCOPED_TRACE("Anti Tri Diagonal Matrix"); + Matrix1 antiTriDiag1; + setupAntiTriDiagMatrix( antiTriDiag1 ); + checkAntiTriDiagMatrix( antiTriDiag1 ); + + Matrix2 antiTriDiag2; + antiTriDiag2 = antiTriDiag1; + checkAntiTriDiagMatrix( antiTriDiag2 ); } - { - SCOPED_TRACE("Uneven Row Size Matrix"); - Matrix1 unevenRowSize1; - setupUnevenRowSizeMatrix( unevenRowSize1 ); - checkUnevenRowSizeMatrix( unevenRowSize1 ); - - Matrix2 unevenRowSize2; - unevenRowSize2 = unevenRowSize1; - checkUnevenRowSizeMatrix( unevenRowSize2 ); + SCOPED_TRACE("Uneven Row Size Matrix"); + Matrix1 unevenRowSize1; + setupUnevenRowSizeMatrix( unevenRowSize1 ); + checkUnevenRowSizeMatrix( unevenRowSize1 ); + + Matrix2 unevenRowSize2; + unevenRowSize2 = unevenRowSize1; + + checkUnevenRowSizeMatrix( unevenRowSize2 ); } } template< typename Matrix1, typename Matrix2 > void testConversion() { - { SCOPED_TRACE("Tri Diagonal Matrix"); - + Matrix1 triDiag1; setupTriDiagMatrix( triDiag1 ); checkTriDiagMatrix( triDiag1 ); - + Matrix2 triDiag2; - TNL::Matrices::copySparseMatrix( triDiag2, triDiag1 ); + triDiag2 = triDiag1; checkTriDiagMatrix( triDiag2 ); } - + { SCOPED_TRACE("Anti Tri Diagonal Matrix"); - + Matrix1 antiTriDiag1; setupAntiTriDiagMatrix( antiTriDiag1 ); checkAntiTriDiagMatrix( antiTriDiag1 ); - + Matrix2 antiTriDiag2; - TNL::Matrices::copySparseMatrix( antiTriDiag2, antiTriDiag1 ); + antiTriDiag2 = antiTriDiag1; checkAntiTriDiagMatrix( antiTriDiag2 ); } - + { SCOPED_TRACE("Uneven Row Size Matrix"); Matrix1 unevenRowSize1; setupUnevenRowSizeMatrix( unevenRowSize1 ); checkUnevenRowSizeMatrix( unevenRowSize1 ); - + Matrix2 unevenRowSize2; - TNL::Matrices::copySparseMatrix( unevenRowSize2, unevenRowSize1 ); + unevenRowSize2 = unevenRowSize1; checkUnevenRowSizeMatrix( unevenRowSize2 ); } } +template< typename Matrix > +void tridiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + TridiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( columns, i + 2 ); j++ ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 2 }; + + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + TridiagonalCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) > 1 ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} + +template< typename Matrix > +void multidiagonalMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType >; + using DiagonalsShiftsType = typename MultidiagonalHost::DiagonalsShiftsType; + DiagonalsShiftsType diagonals{ -4, -2, 0, 1, 3, 5 }; + + const IndexType rows( 10 ), columns( 10 ); + MultidiagonalHost hostMatrix( rows, columns, diagonals ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( diagonals.containsValue( j - i ) ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 3, 4, 5, 5, 6, 5, 5, 4, 4, 3 }; + /*std::cerr << "hostMatrix " << hostMatrix << std::endl; + std::cerr << "matrix " << matrix << std::endl; + std::cerr << "rowCapacities " << rowCapacities << std::endl;*/ + + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } + +#ifdef HAVE_CUDA + MultidiagonalCuda cudaMatrix( rows, columns, diagonals ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( diagonals.containsValue( j - i ) ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } +#endif +} + +template< typename Matrix > +void denseMatrixAssignment() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + using DenseHost = TNL::Matrices::Dense< RealType, TNL::Devices::Host, IndexType >; + using DenseCuda = TNL::Matrices::Dense< RealType, TNL::Devices::Cuda, IndexType >; + + const IndexType rows( 10 ), columns( 10 ); + DenseHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j <= i; j++ ) + hostMatrix( i, j ) = i + j; + + Matrix matrix; + matrix = hostMatrix; + using RowCapacitiesType = typename Matrix::RowsCapacitiesType; + RowCapacitiesType rowCapacities; + matrix.getCompressedRowLengths( rowCapacities ); + RowCapacitiesType exactRowLengths{ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } + +#ifdef HAVE_CUDA + DenseCuda cudaMatrix( rows, columns ); + cudaMatrix = hostMatrix; + matrix = cudaMatrix; + matrix.getCompressedRowLengths( rowCapacities ); + EXPECT_EQ( rowCapacities, exactRowLengths ); + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + { + if( j > i ) + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + else + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + } +#endif +} TEST( SparseMatrixCopyTest, CSR_HostToHost ) { @@ -485,8 +652,8 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_CudaToCuda ) } #endif - -// test conversion between formats +//// +// Test of conversion between formats TEST( SparseMatrixCopyTest, CSR_to_Ellpack_host ) { testConversion< CSR_host, E_host >(); @@ -549,6 +716,108 @@ TEST( SparseMatrixCopyTest, SlicedEllpack_to_Ellpack_cuda ) } #endif -#endif +//// +// Tridiagonal matrix assignment test +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_host ) +{ + tridiagonalMatrixAssignment< CSR_host >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_host ) +{ + tridiagonalMatrixAssignment< E_host >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + tridiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_CSR_cuda ) +{ + tridiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + tridiagonalMatrixAssignment< E_cuda >(); +} + +TEST( SparseMatrixCopyTest, TridiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + tridiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +//// +// Multidiagonal matrix assignment test +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_host ) +{ + multidiagonalMatrixAssignment< CSR_host >(); +} + +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_host ) +{ + multidiagonalMatrixAssignment< E_host >(); +} + +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_host ) +{ + multidiagonalMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_CSR_cuda ) +{ + multidiagonalMatrixAssignment< CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_Ellpack_cuda ) +{ + multidiagonalMatrixAssignment< E_cuda >(); +} + +TEST( SparseMatrixCopyTest, MultidiagonalMatrixAssignment_to_SlicedEllpack_cuda ) +{ + multidiagonalMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +//// +// Dense matrix assignment test +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_host ) +{ + denseMatrixAssignment< CSR_host >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_host ) +{ + denseMatrixAssignment< E_host >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_host ) +{ + denseMatrixAssignment< SE_host >(); +} + +#ifdef HAVE_CUDA +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_CSR_cuda ) +{ + denseMatrixAssignment< CSR_cuda >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_Ellpack_cuda ) +{ + denseMatrixAssignment< E_cuda >(); +} + +TEST( SparseMatrixCopyTest, DenseMatrixAssignment_to_SlicedEllpack_cuda ) +{ + denseMatrixAssignment< SE_cuda >(); +} +#endif // HAVE_CUDA + +#endif //HAVE_GTEST #include "../main.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 5baeb42791a526731277adfaa20715a533ab956c..30d3a692d1a843e90600bffa560314535762e7ad 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -1,39 +1,1405 @@ /*************************************************************************** SparseMatrixTest.h - description ------------------- - begin : Nov 2, 2018 + begin : Nov 22, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/CSR.h> - -#include "SparseMatrixTest.hpp" +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Math.h> +#include <TNL/Algorithms/ParallelFor.h> #include <iostream> +#include <sstream> -#ifdef HAVE_GTEST +#ifdef HAVE_GTEST #include <gtest/gtest.h> -using CSR_host_float = TNL::Matrices::CSR< float, TNL::Devices::Host, int >; -using CSR_host_int = TNL::Matrices::CSR< int, TNL::Devices::Host, int >; +template< typename MatrixHostFloat, typename MatrixHostInt > +void host_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} -using CSR_cuda_float = TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >; -using CSR_cuda_int = TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >; +template< typename MatrixCudaFloat, typename MatrixCudaInt > +void cuda_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} -TEST( SparseMatrixTest, CSR_perforSORIterationTest_Host ) +template< typename Matrix > +void test_Constructors() { - test_PerformSORIteration< CSR_host_float >(); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + Matrix m1( 5, 6 ); + EXPECT_EQ( m1.getRows(), 5 ); + EXPECT_EQ( m1.getColumns(), 6 ); + + Matrix m2( {1, 2, 2, 2, 1 }, 5 ); + typename Matrix::RowsCapacitiesType v1, v2{ 1, 2, 2, 2, 1 }; + m2.setElement( 0, 0, 1 ); // 0th row + m2.setElement( 1, 0, 1 ); // 1st row + m2.setElement( 1, 1, 1 ); + m2.setElement( 2, 1, 1 ); // 2nd row + m2.setElement( 2, 2, 1 ); + m2.setElement( 3, 2, 1 ); // 3rd row + m2.setElement( 3, 3, 1 ); + m2.setElement( 4, 4, 1 ); // 4th row + m2.getCompressedRowLengths( v1 ); + + EXPECT_EQ( v1, v2 ); + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 3 0 0 \ + * | 0 4 5 6 0 | + * | 0 0 7 8 9 | + * | 10 0 0 0 0 | + * | 0 11 0 0 0 | + * \ 0 0 0 12 0 / + */ + + Matrix m3( 6, 5, { + { 0, 0, 1.0 }, { 0, 1, 2.0 }, { 0, 2, 3.0 }, + { 1, 1, 4.0 }, { 1, 2, 5.0 }, { 1, 3, 6.0 }, + { 2, 2, 7.0 }, { 2, 3, 8.0 }, { 2, 4, 9.0 }, + { 3, 0, 10.0 }, + { 4, 1, 11.0 }, + { 5, 3, 12.0 } } ); + + // Check the set elements + EXPECT_EQ( m3.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m3.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m3.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m3.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m3.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m3.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m3.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m3.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m3.getElement( 2, 4 ), 9 ); + + EXPECT_EQ( m3.getElement( 3, 0 ), 10 ); + EXPECT_EQ( m3.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 1 ), 11 ); + EXPECT_EQ( m3.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m3.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m3.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m3.getElement( 5, 3 ), 12 ); + EXPECT_EQ( m3.getElement( 5, 4 ), 0 ); } -#ifdef HAVE_CUDA -TEST( SparseMatrixTest, CSR_perforSORIterationTest_Cuda ) +template< typename Matrix > +void test_SetDimensions() { - // test_PerformSORIteration< CSR_cuda_float >(); + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); } -#endif -#endif +template< typename Matrix > +void test_SetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); + + IndexType rowLength = 1; + for( IndexType i = 2; i < rows; i++ ) + rowLengths.setElement( i, rowLength++ ); + + m.setCompressedRowLengths( rowLengths ); + + // Insert values into the rows. + RealType value = 1; + + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 6; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 8th row + m.setElement( 8, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 9th row + m.setElement( 9, i, value++ ); + + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 3, 3, 1, 2, 3, 4, 5, 6, 7, 8 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1( rows + 1, cols + 2 ); + Matrix2 m2( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 41 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols ); + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; + m.setCompressedRowLengths( rowLengths ); + + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + RealType val; + switch( rowIdx ) + { + case 0: + val = 1; + for( IndexType i = 0; i < 4; i++ ) + row.setElement( i, 2 * i, val++ ); + break; + case 1: + val = 5; + for( IndexType i = 0; i < 3; i++ ) + row.setElement( i, i, val++ ); + break; + case 2: + val = 8; + for( IndexType i = 0; i < 8; i++ ) + row.setElement( i, i, val++ ); + break; + case 3: + val = 16; + for( IndexType i = 0; i < 2; i++ ) + row.setElement( i, i, val++ ); + break; + case 4: + row.setElement( 0, 0, 18 ); + break; + case 5: + row.setElement( 0, 0, 19 ); + break; + case 6: + row.setElement( 0, 0, 20 ); + break; + case 7: + row.setElement( 0, 0, 21 ); + break; + case 8: + val = 22; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + case 9: + val = 32; + for( IndexType i = 0; i < rows; i++ ) + row.setElement( i, i, val++ ); + break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, rows, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 2 0 3 0 4 0 0 0 \ + * | 5 6 7 0 0 0 0 0 0 0 | + * | 8 9 10 11 12 13 14 15 0 0 | + * | 16 17 0 0 0 0 0 0 0 0 | + * | 18 0 0 0 0 0 0 0 0 0 | + * | 19 0 0 0 0 0 0 0 0 0 | + * | 20 0 0 0 0 0 0 0 0 0 | + * | 21 0 0 0 0 0 0 0 0 0 | + * | 22 23 24 25 26 27 28 29 30 31 | + * \ 32 33 34 35 36 37 38 39 40 41 / + */ + + const IndexType rows = 10; + const IndexType cols = 10; + + Matrix m; + m.reset(); + + m.setDimensions( rows, cols ); + + typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 0, 2 * i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) + m.setElement( 1, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) + m.setElement( 2, i, value++ ); + + for( IndexType i = 0; i < 2; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 8; i++ ) + m.setElement( i, 0, value++ ); + + for( IndexType j = 8; j < rows; j++) + for( IndexType i = 0; i < cols; i++ ) + m.setElement( j, i, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 2 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 3 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 4 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 7 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 8 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 11 ); + EXPECT_EQ( m.getElement( 2, 4 ), 12 ); + EXPECT_EQ( m.getElement( 2, 5 ), 13 ); + EXPECT_EQ( m.getElement( 2, 6 ), 14 ); + EXPECT_EQ( m.getElement( 2, 7 ), 15 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 0 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 18 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 19 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 20 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 0 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 21 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 0 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 22 ); + EXPECT_EQ( m.getElement( 8, 1 ), 23 ); + EXPECT_EQ( m.getElement( 8, 2 ), 24 ); + EXPECT_EQ( m.getElement( 8, 3 ), 25 ); + EXPECT_EQ( m.getElement( 8, 4 ), 26 ); + EXPECT_EQ( m.getElement( 8, 5 ), 27 ); + EXPECT_EQ( m.getElement( 8, 6 ), 28 ); + EXPECT_EQ( m.getElement( 8, 7 ), 29 ); + EXPECT_EQ( m.getElement( 8, 8 ), 30 ); + EXPECT_EQ( m.getElement( 8, 9 ), 31 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 32 ); + EXPECT_EQ( m.getElement( 9, 1 ), 33 ); + EXPECT_EQ( m.getElement( 9, 2 ), 34 ); + EXPECT_EQ( m.getElement( 9, 3 ), 35 ); + EXPECT_EQ( m.getElement( 9, 4 ), 36 ); + EXPECT_EQ( m.getElement( 9, 5 ), 37 ); + EXPECT_EQ( m.getElement( 9, 6 ), 38 ); + EXPECT_EQ( m.getElement( 9, 7 ), 39 ); + EXPECT_EQ( m.getElement( 9, 8 ), 40 ); + EXPECT_EQ( m.getElement( 9, 9 ), 41 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 3 0 0 \ + * | 0 4 5 6 0 | + * | 0 0 7 8 9 | + * | 10 1 1 0 0 | + * | 0 11 1 1 0 | + * \ 0 0 1 12 1 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols, { + { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, + { 1, 1, 4 }, { 1, 2, 5 }, { 1, 3, 6 }, + { 2, 2, 7 }, { 2, 3, 8 }, { 2, 4, 9 }, + { 3, 0, 10 }, { 3, 1, 1 }, { 3, 2, 1 }, + { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3, 1 }, + { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } ); + /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.setElement( 1, i, value++ ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 3, 0, value++ ); // 3rd row + + m.setElement( 4, 1, value++ ); // 4th row + + m.setElement( 5, 3, value++ ); // 5th row*/ + + + // Check the set elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 4 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 6 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 7 ); + EXPECT_EQ( m.getElement( 2, 3 ), 8 ); + EXPECT_EQ( m.getElement( 2, 4 ), 9 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 10 ); + EXPECT_EQ( m.getElement( 3, 1 ), 1 ); + EXPECT_EQ( m.getElement( 3, 2 ), 1 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 11 ); + EXPECT_EQ( m.getElement( 4, 2 ), 1 ); + EXPECT_EQ( m.getElement( 4, 3 ), 1 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 1 ); + EXPECT_EQ( m.getElement( 5, 3 ), 12 ); + EXPECT_EQ( m.getElement( 5, 4 ), 1 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 3 6 9 0 0 \ + * | 0 12 15 18 0 | + * | 0 0 21 24 27 | + * | 30 13 14 0 0 | + * | 0 35 16 17 0 | + * \ 0 0 18 41 20 / + */ + + RealType newValue = 1; + for( IndexType i = 0; i < cols - 2; i++ ) // 0th row + m.addElement( 0, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 1st row + m.addElement( 1, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 2nd row + m.addElement( 2, i, newValue++, 2.0 ); + + for( IndexType i = 0; i < cols - 2; i++ ) // 3rd row + m.addElement( 3, i, newValue++, 2.0 ); + + for( IndexType i = 1; i < cols - 1; i++ ) // 4th row + m.addElement( 4, i, newValue++, 2.0 ); + + for( IndexType i = 2; i < cols; i++ ) // 5th row + m.addElement( 5, i, newValue++, 2.0 ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 12 ); + EXPECT_EQ( m.getElement( 1, 2 ), 15 ); + EXPECT_EQ( m.getElement( 1, 3 ), 18 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 21 ); + EXPECT_EQ( m.getElement( 2, 3 ), 24 ); + EXPECT_EQ( m.getElement( 2, 4 ), 27 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 30 ); + EXPECT_EQ( m.getElement( 3, 1 ), 13 ); + EXPECT_EQ( m.getElement( 3, 2 ), 14 ); + EXPECT_EQ( m.getElement( 3, 3 ), 0 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 35 ); + EXPECT_EQ( m.getElement( 4, 2 ), 16 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); + EXPECT_EQ( m.getElement( 4, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 18 ); + EXPECT_EQ( m.getElement( 5, 3 ), 41 ); + EXPECT_EQ( m.getElement( 5, 4 ), 20 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 2 0 3 | + * | 0 4 0 0 | + * \ 0 0 5 0 / + */ + + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; -#include "../main.h" + Matrix m_1; + m_1.reset(); + m_1.setDimensions( m_rows_1, m_cols_1 ); + typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 }; + m_1.setCompressedRowLengths( rowLengths_1 ); + + RealType value_1 = 1; + m_1.setElement( 0, 0, value_1++ ); // 0th row + + m_1.setElement( 1, 1, value_1++ ); // 1st row + m_1.setElement( 1, 3, value_1++ ); + + m_1.setElement( 2, 1, value_1++ ); // 2nd row + + m_1.setElement( 3, 2, value_1++ ); // 3rd row + + VectorType inVector_1; + inVector_1.setSize( m_cols_1 ); + for( IndexType i = 0; i < inVector_1.getSize(); i++ ) + inVector_1.setElement( i, 2 ); + + VectorType outVector_1; + outVector_1.setSize( m_rows_1 ); + for( IndexType j = 0; j < outVector_1.getSize(); j++ ) + outVector_1.setElement( j, 0 ); + + m_1.vectorProduct( inVector_1, outVector_1 ); + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 10 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 8 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 10 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * \ 0 8 0 0 / + */ + + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; + + Matrix m_2( m_rows_2, m_cols_2 ); + typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 }; + m_2.setCompressedRowLengths( rowLengths_2 ); + + RealType value_2 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_2.setElement( 0, i, value_2++ ); + + m_2.setElement( 1, 3, value_2++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_2.setElement( 2, i, value_2++ ); + + for( IndexType i = 1; i < 2; i++ ) // 3rd row + m_2.setElement( 3, i, value_2++ ); + + VectorType inVector_2; + inVector_2.setSize( m_cols_2 ); + for( IndexType i = 0; i < inVector_2.getSize(); i++ ) + inVector_2.setElement( i, 2 ); + + VectorType outVector_2; + outVector_2.setSize( m_rows_2 ); + for( IndexType j = 0; j < outVector_2.getSize(); j++ ) + outVector_2.setElement( j, 0 ); + + m_2.vectorProduct( inVector_2, outVector_2 ); + + EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 8 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 36 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 16 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 5 6 | + * | 7 8 9 0 | + * \ 0 10 11 12 / + */ + + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; + + Matrix m_3( m_rows_3, m_cols_3 ); + typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 }; + m_3.setCompressedRowLengths( rowLengths_3 ); + + RealType value_3 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_3.setElement( 0, i, value_3++ ); + + for( IndexType i = 1; i < 4; i++ ) + m_3.setElement( 1, i, value_3++ ); // 1st row + + for( IndexType i = 0; i < 3; i++ ) // 2nd row + m_3.setElement( 2, i, value_3++ ); + + for( IndexType i = 1; i < 4; i++ ) // 3rd row + m_3.setElement( 3, i, value_3++ ); + + VectorType inVector_3; + inVector_3.setSize( m_cols_3 ); + for( IndexType i = 0; i < inVector_3.getSize(); i++ ) + inVector_3.setElement( i, 2 ); + + VectorType outVector_3; + outVector_3.setSize( m_rows_3 ); + for( IndexType j = 0; j < outVector_3.getSize(); j++ ) + outVector_3.setElement( j, 0 ); + + m_3.vectorProduct( inVector_3, outVector_3 ); + + EXPECT_EQ( outVector_3.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 30 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 48 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 66 ); + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 0 4 0 0 \ + * | 0 5 6 7 8 0 0 0 | + * | 9 10 11 12 13 0 0 0 | + * | 0 14 15 16 17 0 0 0 | + * | 0 0 18 19 20 21 0 0 | + * | 0 0 0 22 23 24 25 0 | + * | 26 27 28 29 30 0 0 0 | + * \ 31 32 33 34 35 0 0 0 / + */ + + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; + + Matrix m_4( m_rows_4, m_cols_4 ); + typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 }; + m_4.setCompressedRowLengths( rowLengths_4 ); + + RealType value_4 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_4.setElement( 0, i, value_4++ ); + + m_4.setElement( 0, 5, value_4++ ); + + for( IndexType i = 1; i < 5; i++ ) // 1st row + m_4.setElement( 1, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 2nd row + m_4.setElement( 2, i, value_4++ ); + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_4.setElement( 3, i, value_4++ ); + + for( IndexType i = 2; i < 6; i++ ) // 4th row + m_4.setElement( 4, i, value_4++ ); + + for( IndexType i = 3; i < 7; i++ ) // 5th row + m_4.setElement( 5, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 6th row + m_4.setElement( 6, i, value_4++ ); + + for( IndexType i = 0; i < 5; i++ ) // 7th row + m_4.setElement( 7, i, value_4++ ); + + VectorType inVector_4; + inVector_4.setSize( m_cols_4 ); + for( IndexType i = 0; i < inVector_4.getSize(); i++ ) + inVector_4.setElement( i, 2 ); + + VectorType outVector_4; + outVector_4.setSize( m_rows_4 ); + for( IndexType j = 0; j < outVector_4.getSize(); j++ ) + outVector_4.setElement( j, 0 ); + + m_4.vectorProduct( inVector_4, outVector_4 ); + + EXPECT_EQ( outVector_4.getElement( 0 ), 20 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 52 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 110 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 124 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 156 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 188 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 280 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 330 ); + + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5( m_rows_5, m_cols_5 ); + typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 }; + m_5.setCompressedRowLengths( rowLengths_5 ); + + RealType value_5 = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m_5.setElement( 0, i, value_5++ ); + + m_5.setElement( 0, 4, value_5++ ); // 0th row + m_5.setElement( 0, 5, value_5++ ); + + m_5.setElement( 1, 1, value_5++ ); // 1st row + m_5.setElement( 1, 3, value_5++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m_5.setElement( 2, i, value_5++ ); + + m_5.setElement( 2, 4, value_5++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m_5.setElement( 3, i, value_5++ ); + + m_5.setElement( 4, 1, value_5++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m_5.setElement( 5, i, value_5++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m_5.setElement( 6, i, value_5++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m_5.setElement( 7, i, value_5++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m_5.setElement( i, 7, 1); + + VectorType inVector_5; + inVector_5.setSize( m_cols_5 ); + for( IndexType i = 0; i < inVector_5.getSize(); i++ ) + inVector_5.setElement( i, 2 ); + + VectorType outVector_5; + outVector_5.setSize( m_rows_5 ); + for( IndexType j = 0; j < outVector_5.getSize(); j++ ) + outVector_5.setElement( j, 0 ); + + m_5.vectorProduct( inVector_5, outVector_5 ); + + EXPECT_EQ( outVector_5.getElement( 0 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 28 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 56 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 102 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 32 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 224 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 352 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 520 ); +} + +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 2 3 0 4 5 0 1 \ 6 + * | 0 6 0 7 0 0 0 1 | 3 + * | 0 8 9 0 10 0 0 1 | 4 + * | 0 11 12 13 14 0 0 1 | 5 + * | 0 15 0 0 0 0 0 1 | 2 + * | 0 16 17 18 19 20 21 1 | 7 + * | 22 23 24 25 26 27 28 1 | 8 + * \ 29 30 31 32 33 34 35 36 / 8 + */ + + const IndexType rows = 8; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 }; + m.setCompressedRowLengths( rowsCapacities ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 0, 4, value++ ); // 0th row + m.setElement( 0, 5, value++ ); + + m.setElement( 1, 1, value++ ); // 1st row + m.setElement( 1, 3, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + m.setElement( 2, 4, value++ ); // 2nd row + + for( IndexType i = 1; i < 5; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + m.setElement( 4, 1, value++ ); // 4th row + + for( IndexType i = 1; i < 7; i++ ) // 5th row + m.setElement( 5, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 6th row + m.setElement( 6, i, value++ ); + + for( IndexType i = 0; i < 8; i++ ) // 7th row + m.setElement( 7, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) // 1s at the end of rows + m.setElement( i, 7, 1); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( rows ); + auto rowLengths_view = rowLengths.getView(); + auto fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] = value; + }; + m.allRowsReduction( fetch, reduce, keep, 0 ); + EXPECT_EQ( rowsCapacities, rowLengths ); + m.getCompressedRowLengths( rowLengths ); + EXPECT_EQ( rowsCapacities, rowLengths ); + + //// + // Compute max norm + TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( rows ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36 +} + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); + m.setCompressedRowLengths( rowLengths ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 2, 1.0 ); // 3rd row + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + + IndexType row = 0; + RealType omega = 1; + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); +} + +template< typename Matrix > +void test_SaveAndLoad( const char* filename ) +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 0 5 | + * | 6 7 8 0 | + * \ 0 9 10 11 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix savedMatrix( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); + savedMatrix.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + savedMatrix.setElement( 0, i, value++ ); + + savedMatrix.setElement( 1, 1, value++ ); + savedMatrix.setElement( 1, 3, value++ ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + savedMatrix.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + savedMatrix.setElement( 3, i, value++ ); + + ASSERT_NO_THROW( savedMatrix.save( filename ) ); + + Matrix loadedMatrix; + + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 5 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 9 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 11 ); + + EXPECT_EQ( std::remove( filename ), 0 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 0 0 4 | + * | 5 6 7 0 | + * | 0 8 9 10 | + * \ 0 0 11 12 / + */ + + const IndexType m_rows = 5; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols ); + typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); + m.setCompressedRowLengths( rowLengths ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + m.setElement( 0, i, value++ ); + + m.setElement( 1, 3, value++ ); // 1st row + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + m.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + m.setElement( 3, i, value++ ); + + for( IndexType i = 2; i < m_cols; i++ ) // 4th row + m.setElement( 4, i, value++ ); + + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->1 Col:1->2 Col:2->3\t\n" + "Row: 1 -> Col:3->4\t\n" + "Row: 2 -> Col:0->5 Col:1->6 Col:2->7\t\n" + "Row: 3 -> Col:1->8 Col:2->9 Col:3->10\t\n" + "Row: 4 -> Col:2->11 Col:3->12\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +#endif diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp index 258ad2c53831010111eeec9dc240368ae5dffb35..5830658abd3135064cc46c8a7c534252a0421935 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cpp @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_CSR.cpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu index 258ad2c53831010111eeec9dc240368ae5dffb35..91f0de81a928a6f5676b2d839a35496dfdae61c3 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.cu @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_CSR.cu - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h index 3530db46c18753102a09b15908fcc5d34fa66026..a72d548f5bdc98c6fbd7920507b4c1978f58ef00 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR.h @@ -1,16 +1,18 @@ /*************************************************************************** SparseMatrixTest_CSR.h - description ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Dec 2, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/CSR.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Matrices/SparseMatrix.h> -#include "SparseMatrixTest.hpp" + +#include "SparseMatrixTest.h" #include <iostream> #ifdef HAVE_GTEST @@ -27,36 +29,35 @@ protected: // types for which MatrixTest is instantiated using CSRMatrixTypes = ::testing::Types < - TNL::Matrices::CSR< int, TNL::Devices::Host, short >, - TNL::Matrices::CSR< long, TNL::Devices::Host, short >, - TNL::Matrices::CSR< float, TNL::Devices::Host, short >, - TNL::Matrices::CSR< double, TNL::Devices::Host, short >, - TNL::Matrices::CSR< int, TNL::Devices::Host, int >, - TNL::Matrices::CSR< long, TNL::Devices::Host, int >, - TNL::Matrices::CSR< float, TNL::Devices::Host, int >, - TNL::Matrices::CSR< double, TNL::Devices::Host, int >, - TNL::Matrices::CSR< int, TNL::Devices::Host, long >, - TNL::Matrices::CSR< long, TNL::Devices::Host, long >, - TNL::Matrices::CSR< float, TNL::Devices::Host, long >, - TNL::Matrices::CSR< double, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR > #ifdef HAVE_CUDA - ,TNL::Matrices::CSR< int, TNL::Devices::Cuda, short >, - TNL::Matrices::CSR< long, TNL::Devices::Cuda, short >, - TNL::Matrices::CSR< float, TNL::Devices::Cuda, short >, - TNL::Matrices::CSR< double, TNL::Devices::Cuda, short >, - TNL::Matrices::CSR< int, TNL::Devices::Cuda, int >, - TNL::Matrices::CSR< long, TNL::Devices::Cuda, int >, - TNL::Matrices::CSR< float, TNL::Devices::Cuda, int >, - TNL::Matrices::CSR< double, TNL::Devices::Cuda, int >, - TNL::Matrices::CSR< int, TNL::Devices::Cuda, long >, - TNL::Matrices::CSR< long, TNL::Devices::Cuda, long >, - TNL::Matrices::CSR< float, TNL::Devices::Cuda, long >, - TNL::Matrices::CSR< double, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, TNL::Containers::Segments::CSR > #endif >; TYPED_TEST_SUITE( CSRMatrixTest, CSRMatrixTypes); +TYPED_TEST( CSRMatrixTest, Constructors ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_Constructors< CSRMatrixType >(); +} + TYPED_TEST( CSRMatrixTest, setDimensionsTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; @@ -64,19 +65,12 @@ TYPED_TEST( CSRMatrixTest, setDimensionsTest ) test_SetDimensions< CSRMatrixType >(); } -//TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) -//{ -//// using CSRMatrixType = typename TestFixture::CSRMatrixType; -// -//// test_SetCompressedRowLengths< CSRMatrixType >(); -// -// bool testRan = false; -// EXPECT_TRUE( testRan ); -// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; -// std::cout << " This test is dependent on the input format. \n"; -// std::cout << " Almost every format allocates elements per row differently.\n\n"; -// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; -//} +TYPED_TEST( CSRMatrixTest, setCompressedRowLengthsTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_SetCompressedRowLengths< CSRMatrixType >(); +} TYPED_TEST( CSRMatrixTest, setLikeTest ) { @@ -92,6 +86,14 @@ TYPED_TEST( CSRMatrixTest, resetTest ) test_Reset< CSRMatrixType >(); } +TYPED_TEST( CSRMatrixTest, getRowTest ) +{ + using CSRMatrixType = typename TestFixture::CSRMatrixType; + + test_GetRow< CSRMatrixType >(); +} + + TYPED_TEST( CSRMatrixTest, setElementTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; @@ -106,25 +108,25 @@ TYPED_TEST( CSRMatrixTest, addElementTest ) test_AddElement< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, setRowTest ) +TYPED_TEST( CSRMatrixTest, vectorProductTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; - test_SetRow< CSRMatrixType >(); + test_VectorProduct< CSRMatrixType >(); } -TYPED_TEST( CSRMatrixTest, vectorProductTest ) +TYPED_TEST( CSRMatrixTest, rowsReduction ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; - test_VectorProduct< CSRMatrixType >(); + test_RowsReduction< CSRMatrixType >(); } TYPED_TEST( CSRMatrixTest, saveAndLoadTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; - test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR" ); + test_SaveAndLoad< CSRMatrixType >( "test_SparseMatrixTest_CSR_segments" ); } TYPED_TEST( CSRMatrixTest, printTest ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp index c454706f0b1d437b798f2d7a1e93ccf4c0291d3f..3c30c54c5e8fd6fe5213367e410fd039b4edabb3 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cpp @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_Ellpack.cpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_Ellpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu index c454706f0b1d437b798f2d7a1e93ccf4c0291d3f..9a27cece6e440e42061781c9529660dfac80eacc 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.cu @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_Ellpack.cu - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_Ellpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h index 979068e02ea2d5b4ed5c3dc4f4db2a566c027934..2a890e694f4ca90edc7aa3b98fba56f666c2097d 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h @@ -1,16 +1,18 @@ /*************************************************************************** SparseMatrixTest_Ellpack.h - description ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/Ellpack.h> +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Matrices/SparseMatrix.h> -#include "SparseMatrixTest.hpp" + +#include "SparseMatrixTest.h" #include <iostream> #ifdef HAVE_GTEST @@ -24,38 +26,48 @@ protected: using EllpackMatrixType = Matrix; }; +//// +// Row-major format is used for the host system +template< typename Device, typename Index, typename IndexAlocator > +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >; + // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types < - TNL::Matrices::Ellpack< int, TNL::Devices::Host, short >, - TNL::Matrices::Ellpack< long, TNL::Devices::Host, short >, - TNL::Matrices::Ellpack< float, TNL::Devices::Host, short >, - TNL::Matrices::Ellpack< double, TNL::Devices::Host, short >, - TNL::Matrices::Ellpack< int, TNL::Devices::Host, int >, - TNL::Matrices::Ellpack< long, TNL::Devices::Host, int >, - TNL::Matrices::Ellpack< float, TNL::Devices::Host, int >, - TNL::Matrices::Ellpack< double, TNL::Devices::Host, int >, - TNL::Matrices::Ellpack< int, TNL::Devices::Host, long >, - TNL::Matrices::Ellpack< long, TNL::Devices::Host, long >, - TNL::Matrices::Ellpack< float, TNL::Devices::Host, long >, - TNL::Matrices::Ellpack< double, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorEllpack > #ifdef HAVE_CUDA - ,TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::Ellpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::Ellpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, int >, - TNL::Matrices::Ellpack< long, TNL::Devices::Cuda, int >, - TNL::Matrices::Ellpack< float, TNL::Devices::Cuda, int >, - TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, int >, - TNL::Matrices::Ellpack< int, TNL::Devices::Cuda, long >, - TNL::Matrices::Ellpack< long, TNL::Devices::Cuda, long >, - TNL::Matrices::Ellpack< float, TNL::Devices::Cuda, long >, - TNL::Matrices::Ellpack< double, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorEllpack > #endif >; -TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes ); +TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes); + +TYPED_TEST( EllpackMatrixTest, Constructors ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Constructors< EllpackMatrixType >(); +} TYPED_TEST( EllpackMatrixTest, setDimensionsTest ) { @@ -92,6 +104,13 @@ TYPED_TEST( EllpackMatrixTest, resetTest ) test_Reset< EllpackMatrixType >(); } +TYPED_TEST( EllpackMatrixTest, getRowTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_GetRow< EllpackMatrixType >(); +} + TYPED_TEST( EllpackMatrixTest, setElementTest ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; @@ -106,25 +125,25 @@ TYPED_TEST( EllpackMatrixTest, addElementTest ) test_AddElement< EllpackMatrixType >(); } -TYPED_TEST( EllpackMatrixTest, setRowTest ) +TYPED_TEST( EllpackMatrixTest, vectorProductTest ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - test_SetRow< EllpackMatrixType >(); + test_VectorProduct< EllpackMatrixType >(); } -TYPED_TEST( EllpackMatrixTest, vectorProductTest ) +TYPED_TEST( EllpackMatrixTest, rowsReduction ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - test_VectorProduct< EllpackMatrixType >(); + test_RowsReduction< EllpackMatrixType >(); } TYPED_TEST( EllpackMatrixTest, saveAndLoadTest ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack" ); + test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack_segments" ); } TYPED_TEST( EllpackMatrixTest, printTest ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp index 40e2e94b81ca64051ddceee82f46dd2d20e66e42..2c79ee5027bc9dcbcaad4ddab932976d1eb0c790 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cpp @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_SlicedEllpack.cpp - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu index 40e2e94b81ca64051ddceee82f46dd2d20e66e42..bff81d9a3e008385dd13923f9e9aca7281611e55 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.cu @@ -1 +1,11 @@ +/*************************************************************************** + SparseMatrixTest_SlicedEllpack.cu - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + #include "SparseMatrixTest_SlicedEllpack.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h index 0798f59dc49fbb5ada03d975fe60a61ae3e85fcc..17b48dcf461e4b8e99a9d1d9172ded8301b20038 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h @@ -1,16 +1,19 @@ /*************************************************************************** SparseMatrixTest_SlicedEllpack.h - description ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ -#include <TNL/Matrices/SlicedEllpack.h> +#include <TNL/Containers/Segments/SlicedEllpack.h> +#include <TNL/Matrices/SparseMatrix.h> +#include <TNL/Matrices/MatrixType.h> -#include "SparseMatrixTest.hpp" + +#include "SparseMatrixTest.h" #include <iostream> #ifdef HAVE_GTEST @@ -24,38 +27,48 @@ protected: using SlicedEllpackMatrixType = Matrix; }; +//// +// Row-major format is used for the host system +template< typename Device, typename Index, typename IndexAllocator > +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index, typename IndexAllocator > +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >; + // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, short >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Host, short >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Host, short >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, short >, - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, int >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Host, int >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Host, int >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, int >, - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Host, long >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Host, long >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Host, long >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::GeneralMatrix, RowMajorSlicedEllpack > #ifdef HAVE_CUDA - ,TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, short >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Cuda, short >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Cuda, short >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, short >, - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, int >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Cuda, int >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Cuda, int >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, int >, - TNL::Matrices::SlicedEllpack< int, TNL::Devices::Cuda, long >, - TNL::Matrices::SlicedEllpack< long, TNL::Devices::Cuda, long >, - TNL::Matrices::SlicedEllpack< float, TNL::Devices::Cuda, long >, - TNL::Matrices::SlicedEllpack< double, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::GeneralMatrix, ColumnMajorSlicedEllpack > #endif >; -TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes ); +TYPED_TEST_SUITE( SlicedEllpackMatrixTest, SlicedEllpackMatrixTypes); + +TYPED_TEST( SlicedEllpackMatrixTest, Constructors ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_Constructors< SlicedEllpackMatrixType >(); +} TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest ) { @@ -64,19 +77,12 @@ TYPED_TEST( SlicedEllpackMatrixTest, setDimensionsTest ) test_SetDimensions< SlicedEllpackMatrixType >(); } -//TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest ) -//{ -//// using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; -// -//// test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); -// -// bool testRan = false; -// EXPECT_TRUE( testRan ); -// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; -// std::cout << " This test is dependent on the input format. \n"; -// std::cout << " Almost every format allocates elements per row differently.\n\n"; -// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; -//} +TYPED_TEST( SlicedEllpackMatrixTest, setCompressedRowLengthsTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); +} TYPED_TEST( SlicedEllpackMatrixTest, setLikeTest ) { @@ -92,6 +98,13 @@ TYPED_TEST( SlicedEllpackMatrixTest, resetTest ) test_Reset< SlicedEllpackMatrixType >(); } +TYPED_TEST( SlicedEllpackMatrixTest, getRowTest ) +{ + using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; + + test_GetRow< SlicedEllpackMatrixType >(); +} + TYPED_TEST( SlicedEllpackMatrixTest, setElementTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; @@ -106,25 +119,25 @@ TYPED_TEST( SlicedEllpackMatrixTest, addElementTest ) test_AddElement< SlicedEllpackMatrixType >(); } -TYPED_TEST( SlicedEllpackMatrixTest, setRowTest ) +TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - test_SetRow< SlicedEllpackMatrixType >(); + test_VectorProduct< SlicedEllpackMatrixType >(); } -TYPED_TEST( SlicedEllpackMatrixTest, vectorProductTest ) +TYPED_TEST( SlicedEllpackMatrixTest, rowsReduction ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - test_VectorProduct< SlicedEllpackMatrixType >(); + test_RowsReduction< SlicedEllpackMatrixType >(); } TYPED_TEST( SlicedEllpackMatrixTest, saveAndLoadTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack" ); + test_SaveAndLoad< SlicedEllpackMatrixType >( "test_SparseMatrixTest_SlicedEllpack_segments" ); } TYPED_TEST( SlicedEllpackMatrixTest, printTest ) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h new file mode 100644 index 0000000000000000000000000000000000000000..02fd8c585366f4da12d1218a28adca717dd2cdf2 --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -0,0 +1,112 @@ +/*************************************************************************** + SymmetricSparseMatrixTest.h - description + ------------------- + begin : Feb 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> +#include <iostream> +#include <TNL/Matrices/SparseMatrix.h> +#include "SymmetricSparseMatrixTest.hpp" + +// test fixture for typed tests +template< typename Matrix > +class MatrixTest : public ::testing::Test +{ +protected: + using MatrixType = Matrix; +}; + +TYPED_TEST_SUITE( MatrixTest, MatrixTypes); + +TYPED_TEST( MatrixTest, setDimensionsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetDimensions< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setCompressedRowLengthsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetCompressedRowLengths< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setLikeTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetLike< MatrixType, MatrixType >(); +} + +TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElements ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNumberOfNonzeroMatrixElements< MatrixType >(); +} + +TYPED_TEST( MatrixTest, resetTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Reset< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetRow< MatrixType >(); +} + + +TYPED_TEST( MatrixTest, setElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, vectorProductTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_VectorProduct< MatrixType >(); +} + +TYPED_TEST( MatrixTest, rowsReduction ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_RowsReduction< MatrixType >(); +} + +TYPED_TEST( MatrixTest, saveAndLoadTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SaveAndLoad< MatrixType >( saveAndLoadTestFileName ); +} + +TYPED_TEST( MatrixTest, printTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Print< MatrixType >(); +} + +#endif diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp new file mode 100644 index 0000000000000000000000000000000000000000..58a4f4fce0e4fdafcaf2c095e6d35875c51d286b --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp @@ -0,0 +1,1153 @@ +/*************************************************************************** + SymmetricSparseMatrixTest.h - description + ------------------- + begin : Feb 7, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Math.h> +#include <TNL/Algorithms/ParallelFor.h> +#include <TNL/Algorithms/AtomicOperations.h> +#include <iostream> +#include <sstream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +template< typename MatrixHostFloat, typename MatrixHostInt > +void host_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename MatrixCudaFloat, typename MatrixCudaInt > +void cuda_test_GetType() +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cerr << "This test has not been implemented properly yet.\n" << std::endl; +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + +template< typename Matrix > +void test_SetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + / 1 2 4 7 \ + | 2 3 5 8 10 13 16 19 | + | 4 5 6 11 14 21 24 27 | + | 7 8 9 17 20 | + | 10 11 12 22 25 | + | 13 14 15 28 | + | 16 17 18 | + | 19 20 21 | + | 21 22 23 | + | 24 25 26 | + \ 27 28 30 / + */ + const IndexType rows = 11; + const IndexType cols = 11; + + Matrix m( rows, cols ); + typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; + m.setCompressedRowLengths( rowLengths ); + + // Insert values into the rows. + RealType value = 1; + + // 0th row - lower part + m.setElement( 0, 0, value++ ); + + // 1st row - lower part + m.setElement( 1, 0, value++ ); + m.setElement( 1, 1, value++ ); + + // 2nd row - lower part + m.setElement( 2, 0, value++ ); + m.setElement( 2, 1, value++ ); + m.setElement( 2, 2, value++ ); + + // 3rd row - lower part + m.setElement( 3, 0, value++ ); + m.setElement( 3, 1, value++ ); + m.setElement( 3, 3, value++ ); + + // 4th row - lower part + m.setElement( 4, 1, value++ ); + m.setElement( 4, 2, value++ ); + m.setElement( 4, 4, value++ ); + + // 5th row - lower part + m.setElement( 5, 1, value++ ); + m.setElement( 5, 2, value++ ); + m.setElement( 5, 5, value++ ); + + // 6th row - lower part + m.setElement( 6, 1, value++ ); + m.setElement( 6, 3, value++ ); + m.setElement( 6, 6, value++ ); + + // 7th row - lower part + m.setElement( 7, 1, value++ ); + m.setElement( 7, 3, value++ ); + m.setElement( 7, 7, value++ ); + + // 8th row - lower part + m.setElement( 8, 2, value++ ); + m.setElement( 8, 4, value++ ); + m.setElement( 8, 8, value++ ); + + // 8th row - lower part + m.setElement( 9, 2, value++ ); + m.setElement( 9, 4, value++ ); + m.setElement( 9, 9, value++ ); + + // 8th row - lower part + m.setElement( 10, 2, value++ ); + m.setElement( 10, 5, value++ ); + m.setElement( 10, 10, value++ ); + + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1( rows + 1, cols + 2 ); + Matrix2 m2( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + / 1 2 4 7 \ -> 4 + | 2 3 5 8 10 13 16 19 | -> 8 + | 4 5 6 11 14 21 25 28 | -> 8 + | 7 8 9 17 20 | -> 5 + | 10 11 12 23 26 | -> 5 + | 13 14 15 29 | -> 4 + | 16 17 18 | -> 3 + | 19 20 21 | -> 3 + | 22 23 24 | -> 3 + | 25 26 27 | -> 3 + \ 28 29 30 / -> 3 + ---- + 49 + */ + + const IndexType rows = 11; + const IndexType cols = 11; + + Matrix m( rows, cols, { + { 0, 0, 1 }, + { 1, 0, 2 }, { 1, 1, 3 }, + { 2, 0, 4 }, { 2, 1, 5 }, { 2, 2, 6 }, + { 3, 0, 7 }, { 3, 1, 8 }, { 3, 3, 9 }, + { 4, 1, 10 }, { 4, 2, 11 }, { 4, 4, 12 }, + { 5, 1, 13 }, { 5, 2, 14 }, { 5, 5, 15 }, + { 6, 1, 16 }, { 6, 3, 17 }, { 6, 6, 18 }, + { 7, 1, 19 }, { 7, 3, 20 }, { 7, 7, 21 }, + { 8, 2, 22 }, { 8, 4, 23 }, { 8, 8, 24 }, + { 9, 2, 25 }, { 9, 4, 26 }, { 9, 9, 27 }, + { 10, 2, 28 }, { 10, 4, 29 }, { 10, 10, 30 } + } ); + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 49 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols ); + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_GetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + /* + * Sets up the following 11x11 sparse matrix: + * + / 1 2 4 7 \ + | 2 3 5 8 10 13 16 19 | + | 4 5 6 11 14 22 25 28 | + | 7 8 9 17 20 | + | 10 11 12 23 26 | + | 13 14 15 29 | + | 16 17 18 | + | 19 20 21 | + | 22 23 24 | + | 25 26 27 | + \ 28 29 30 / + */ + + Matrix m( { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, 11 ); + + auto matrixView = m.getView(); + auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + RealType val; + switch( rowIdx ) + { + case 0: row.setElement( 0, 0, 1 ); break; + case 1: row.setElement( 0, 0, 2 ); row.setElement( 1, 1, 3 ); break; + case 2: row.setElement( 0, 0, 4 ); row.setElement( 1, 1, 5 ); row.setElement( 2, 2, 6 ); break; + case 3: row.setElement( 0, 0, 7 ); row.setElement( 1, 1, 8 ); row.setElement( 2, 3, 9 ); break; + case 4: row.setElement( 0, 1, 10 ); row.setElement( 1, 2, 11 ); row.setElement( 2, 4, 12 ); break; + case 5: row.setElement( 0, 1, 13 ); row.setElement( 1, 2, 14 ); row.setElement( 2, 5, 15 ); break; + case 6: row.setElement( 0, 1, 16 ); row.setElement( 1, 3, 17 ); row.setElement( 2, 6, 18 ); break; + case 7: row.setElement( 0, 1, 19 ); row.setElement( 1, 3, 20 ); row.setElement( 2, 7, 21 ); break; + case 8: row.setElement( 0, 2, 22 ); row.setElement( 1, 4, 23 ); row.setElement( 2, 8, 24 ); break; + case 9: row.setElement( 0, 2, 25 ); row.setElement( 1, 4, 26 ); row.setElement( 2, 9, 27 ); break; + case 10: row.setElement( 0, 2, 28 ); row.setElement( 1, 5, 29 ); row.setElement( 2, 10, 30 ); break; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, m.getRows(), f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 4 ); + EXPECT_EQ( m.getElement( 0, 3 ), 7 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 0 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + EXPECT_EQ( m.getElement( 0, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 2 ); + EXPECT_EQ( m.getElement( 1, 1 ), 3 ); + EXPECT_EQ( m.getElement( 1, 2 ), 5 ); + EXPECT_EQ( m.getElement( 1, 3 ), 8 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + EXPECT_EQ( m.getElement( 1, 5 ), 13 ); + EXPECT_EQ( m.getElement( 1, 6 ), 16 ); + EXPECT_EQ( m.getElement( 1, 7 ), 19 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + EXPECT_EQ( m.getElement( 1, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 4 ); + EXPECT_EQ( m.getElement( 2, 1 ), 5 ); + EXPECT_EQ( m.getElement( 2, 2 ), 6 ); + EXPECT_EQ( m.getElement( 2, 3 ), 0 ); + EXPECT_EQ( m.getElement( 2, 4 ), 11 ); + EXPECT_EQ( m.getElement( 2, 5 ), 14 ); + EXPECT_EQ( m.getElement( 2, 6 ), 0 ); + EXPECT_EQ( m.getElement( 2, 7 ), 0 ); + EXPECT_EQ( m.getElement( 2, 8 ), 22 ); + EXPECT_EQ( m.getElement( 2, 9 ), 25 ); + EXPECT_EQ( m.getElement( 2, 10 ), 28 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 7 ); + EXPECT_EQ( m.getElement( 3, 1 ), 8 ); + EXPECT_EQ( m.getElement( 3, 2 ), 0 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 17 ); + EXPECT_EQ( m.getElement( 3, 7 ), 20 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + EXPECT_EQ( m.getElement( 3, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 10 ); + EXPECT_EQ( m.getElement( 4, 2 ), 11 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 12 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 0 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 23 ); + EXPECT_EQ( m.getElement( 4, 9 ), 26 ); + EXPECT_EQ( m.getElement( 4, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 13 ); + EXPECT_EQ( m.getElement( 5, 2 ), 14 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 15 ); + EXPECT_EQ( m.getElement( 5, 6 ), 0 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + EXPECT_EQ( m.getElement( 5, 10 ), 29 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 16 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 17 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 0 ); + EXPECT_EQ( m.getElement( 6, 6 ), 18 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + EXPECT_EQ( m.getElement( 6, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 0 ); + EXPECT_EQ( m.getElement( 7, 1 ), 19 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 20 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 21 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + EXPECT_EQ( m.getElement( 7, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 0 ); + EXPECT_EQ( m.getElement( 8, 1 ), 0 ); + EXPECT_EQ( m.getElement( 8, 2 ), 22 ); + EXPECT_EQ( m.getElement( 8, 3 ), 0 ); + EXPECT_EQ( m.getElement( 8, 4 ), 23 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + EXPECT_EQ( m.getElement( 8, 6 ), 0 ); + EXPECT_EQ( m.getElement( 8, 7 ), 0 ); + EXPECT_EQ( m.getElement( 8, 8 ), 24 ); + EXPECT_EQ( m.getElement( 8, 9 ), 0 ); + EXPECT_EQ( m.getElement( 8, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 25 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 26 ); + EXPECT_EQ( m.getElement( 9, 5 ), 0 ); + EXPECT_EQ( m.getElement( 9, 6 ), 0 ); + EXPECT_EQ( m.getElement( 9, 7 ), 0 ); + EXPECT_EQ( m.getElement( 9, 8 ), 0 ); + EXPECT_EQ( m.getElement( 9, 9 ), 27 ); + EXPECT_EQ( m.getElement( 9, 10 ), 0 ); + + EXPECT_EQ( m.getElement( 10, 0 ), 0 ); + EXPECT_EQ( m.getElement( 10, 1 ), 0 ); + EXPECT_EQ( m.getElement( 10, 2 ), 28 ); + EXPECT_EQ( m.getElement( 10, 3 ), 0 ); + EXPECT_EQ( m.getElement( 10, 4 ), 0 ); + EXPECT_EQ( m.getElement( 10, 5 ), 29 ); + EXPECT_EQ( m.getElement( 10, 6 ), 0 ); + EXPECT_EQ( m.getElement( 10, 7 ), 0 ); + EXPECT_EQ( m.getElement( 10, 8 ), 0 ); + EXPECT_EQ( m.getElement( 10, 9 ), 0 ); + EXPECT_EQ( m.getElement( 10, 10 ), 30 ); +} + + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 10x10 sparse matrix: + * + * / 1 0 0 4 0 0 10 0 0 0 \ + * | 0 2 0 5 0 0 11 0 0 0 | + * | 0 0 3 6 0 0 12 0 0 0 | + * | 4 5 6 7 0 0 13 0 0 0 | + * | 0 0 0 0 8 0 14 0 0 0 | + * | 0 0 0 0 0 9 15 0 0 0 | + * | 10 11 12 13 14 15 16 0 0 0 | + * | 0 0 0 0 0 0 0 17 0 0 | + * | 0 0 0 0 0 0 0 0 18 0 | + * \ 0 0 0 0 0 0 0 0 0 19 / + */ + + Matrix m( { 1, 1, 1, 4, 1, 1, 7, 1, 1, 1 }, 10 ); + + RealType value = 1; + for( IndexType i = 0; i < 3; i++ ) + m.setElement( i, i, value++ ); + + for( IndexType i = 0; i < 4; i++ ) + m.setElement( 3, i, value++ ); + + for( IndexType i = 4; i < 6; i++ ) + m.setElement( i, i, value++ ); + + for( IndexType i = 0; i < 7; i++ ) + m.setElement( 6, i, value++ ); + + for( IndexType i = 7; i < 10; i++ ) + m.setElement( i, i, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 0 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 10 ); + EXPECT_EQ( m.getElement( 0, 7 ), 0 ); + EXPECT_EQ( m.getElement( 0, 8 ), 0 ); + EXPECT_EQ( m.getElement( 0, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 0 ); + EXPECT_EQ( m.getElement( 1, 1 ), 2 ); + EXPECT_EQ( m.getElement( 1, 2 ), 0 ); + EXPECT_EQ( m.getElement( 1, 3 ), 5 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 11 ); + EXPECT_EQ( m.getElement( 1, 7 ), 0 ); + EXPECT_EQ( m.getElement( 1, 8 ), 0 ); + EXPECT_EQ( m.getElement( 1, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 0 ); + EXPECT_EQ( m.getElement( 2, 2 ), 3 ); + EXPECT_EQ( m.getElement( 2, 3 ), 6 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m.getElement( 2, 6 ), 12 ); + EXPECT_EQ( m.getElement( 2, 7 ), 0 ); + EXPECT_EQ( m.getElement( 2, 8 ), 0 ); + EXPECT_EQ( m.getElement( 2, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 4 ); + EXPECT_EQ( m.getElement( 3, 1 ), 5 ); + EXPECT_EQ( m.getElement( 3, 2 ), 6 ); + EXPECT_EQ( m.getElement( 3, 3 ), 7 ); + EXPECT_EQ( m.getElement( 3, 4 ), 0 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + EXPECT_EQ( m.getElement( 3, 6 ), 13 ); + EXPECT_EQ( m.getElement( 3, 7 ), 0 ); + EXPECT_EQ( m.getElement( 3, 8 ), 0 ); + EXPECT_EQ( m.getElement( 3, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 0 ); + EXPECT_EQ( m.getElement( 4, 4 ), 8 ); + EXPECT_EQ( m.getElement( 4, 5 ), 0 ); + EXPECT_EQ( m.getElement( 4, 6 ), 14 ); + EXPECT_EQ( m.getElement( 4, 7 ), 0 ); + EXPECT_EQ( m.getElement( 4, 8 ), 0 ); + EXPECT_EQ( m.getElement( 4, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 0 ); + EXPECT_EQ( m.getElement( 5, 5 ), 9 ); + EXPECT_EQ( m.getElement( 5, 6 ), 15 ); + EXPECT_EQ( m.getElement( 5, 7 ), 0 ); + EXPECT_EQ( m.getElement( 5, 8 ), 0 ); + EXPECT_EQ( m.getElement( 5, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 10 ); + EXPECT_EQ( m.getElement( 6, 1 ), 11 ); + EXPECT_EQ( m.getElement( 6, 2 ), 12 ); + EXPECT_EQ( m.getElement( 6, 3 ), 13 ); + EXPECT_EQ( m.getElement( 6, 4 ), 14 ); + EXPECT_EQ( m.getElement( 6, 5 ), 15 ); + EXPECT_EQ( m.getElement( 6, 6 ), 16 ); + EXPECT_EQ( m.getElement( 6, 7 ), 0 ); + EXPECT_EQ( m.getElement( 6, 8 ), 0 ); + EXPECT_EQ( m.getElement( 6, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 7, 0 ), 0 ); + EXPECT_EQ( m.getElement( 7, 1 ), 0 ); + EXPECT_EQ( m.getElement( 7, 2 ), 0 ); + EXPECT_EQ( m.getElement( 7, 3 ), 0 ); + EXPECT_EQ( m.getElement( 7, 4 ), 0 ); + EXPECT_EQ( m.getElement( 7, 5 ), 0 ); + EXPECT_EQ( m.getElement( 7, 6 ), 0 ); + EXPECT_EQ( m.getElement( 7, 7 ), 17 ); + EXPECT_EQ( m.getElement( 7, 8 ), 0 ); + EXPECT_EQ( m.getElement( 7, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 8, 0 ), 0 ); + EXPECT_EQ( m.getElement( 8, 1 ), 0 ); + EXPECT_EQ( m.getElement( 8, 2 ), 0 ); + EXPECT_EQ( m.getElement( 8, 3 ), 0 ); + EXPECT_EQ( m.getElement( 8, 4 ), 0 ); + EXPECT_EQ( m.getElement( 8, 5 ), 0 ); + EXPECT_EQ( m.getElement( 8, 6 ), 0 ); + EXPECT_EQ( m.getElement( 8, 7 ), 0 ); + EXPECT_EQ( m.getElement( 8, 8 ), 18 ); + EXPECT_EQ( m.getElement( 8, 9 ), 0 ); + + EXPECT_EQ( m.getElement( 9, 0 ), 0 ); + EXPECT_EQ( m.getElement( 9, 1 ), 0 ); + EXPECT_EQ( m.getElement( 9, 2 ), 0 ); + EXPECT_EQ( m.getElement( 9, 3 ), 0 ); + EXPECT_EQ( m.getElement( 9, 4 ), 0 ); + EXPECT_EQ( m.getElement( 9, 5 ), 0 ); + EXPECT_EQ( m.getElement( 9, 6 ), 0 ); + EXPECT_EQ( m.getElement( 9, 7 ), 0 ); + EXPECT_EQ( m.getElement( 9, 8 ), 0 ); + EXPECT_EQ( m.getElement( 9, 9 ), 19 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 0 0 0 \ + * | 2 3 4 0 0 | + * | 0 4 5 6 0 | + * | 0 0 6 7 8 | + * | 0 0 0 8 9 | + * \ 0 0 0 0 10 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( 6, 5, { + { 0, 0, 1 }, + { 1, 0, 2 }, { 1, 1, 3 }, + { 2, 1, 4 }, { 2, 2, 5 }, + { 3, 2, 6 }, { 3, 3, 7 }, + { 4, 3, 8 }, { 4, 4, 9 }, + { 5, 4, 10 } } ); + + // Check the set elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 2 ); + EXPECT_EQ( m.getElement( 1, 1 ), 3 ); + EXPECT_EQ( m.getElement( 1, 2 ), 4 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 4 ); + EXPECT_EQ( m.getElement( 2, 2 ), 5 ); + EXPECT_EQ( m.getElement( 2, 3 ), 6 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 6 ); + EXPECT_EQ( m.getElement( 3, 3 ), 7 ); + EXPECT_EQ( m.getElement( 3, 4 ), 8 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 8 ); + EXPECT_EQ( m.getElement( 4, 4 ), 9 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 10 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 1 2 0 0 0 \ / 0 1 0 0 0 \ / 2 5 0 0 0 \ + * | 2 3 4 0 0 | | 1 0 1 0 0 | | 5 6 9 0 0 | + * 2 | 0 4 5 6 0 | + | 0 1 0 1 0 | = | 0 9 10 13 0 | + * | 0 0 6 7 8 | | 0 0 1 0 1 | | 0 0 13 14 17 | + * | 0 0 0 8 9 | | 0 0 0 1 0 | | 0 0 0 17 18 | + * \ 0 0 0 0 10 / \ 0 0 0 0 1 / \ 0 0 0 0 21 / + */ + + for( IndexType i = 0; i < rows; i++ ) + { + if( i > 0 ) + m.addElement( i, i - 1, 1.0, 2.0 ); + if( i < cols ) + m.addElement( i, i, 0.0, 2.0 ); + } + + EXPECT_EQ( m.getElement( 0, 0 ), 2 ); + EXPECT_EQ( m.getElement( 0, 1 ), 5 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 5 ); + EXPECT_EQ( m.getElement( 1, 1 ), 6 ); + EXPECT_EQ( m.getElement( 1, 2 ), 9 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 9 ); + EXPECT_EQ( m.getElement( 2, 2 ), 10 ); + EXPECT_EQ( m.getElement( 2, 3 ), 13 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 13 ); + EXPECT_EQ( m.getElement( 3, 3 ), 14 ); + EXPECT_EQ( m.getElement( 3, 4 ), 17 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 17 ); + EXPECT_EQ( m.getElement( 4, 4 ), 18 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 21 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + /** + * Sets up the following 4x4 sparse matrix: + * + * / 1 0 0 0 \ + * | 0 2 3 4 | + * | 0 3 0 5 | + * \ 0 4 5 0 / + */ + + const IndexType m_rows_1 = 4; + const IndexType m_cols_1 = 4; + + Matrix m_1( m_rows_1, m_cols_1, { + { 0, 0, 1 }, + { 1, 1, 2 }, + { 2, 1, 3 }, + { 3, 1, 4 }, { 3, 2, 5 } } ); + + VectorType inVector_1( m_cols_1, 2.0 ); + VectorType outVector_1( m_rows_1, 0.0 ); + m_1.vectorProduct( inVector_1, outVector_1 ); + + EXPECT_EQ( outVector_1.getElement( 0 ), 2 ); + EXPECT_EQ( outVector_1.getElement( 1 ), 18 ); + EXPECT_EQ( outVector_1.getElement( 2 ), 16 ); + EXPECT_EQ( outVector_1.getElement( 3 ), 18 ); + + /** + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 2 0 6 8 | + * | 3 6 7 0 | + * \ 0 8 0 9 / + */ + + const IndexType m_rows_2 = 4; + const IndexType m_cols_2 = 4; + + Matrix m_2( m_rows_2, m_cols_2, { + { 0, 0, 1 }, + { 1, 0, 2 }, + { 2, 0, 3 }, { 2, 1, 6 }, { 2, 2, 7 }, + { 3, 1, 8 }, { 3, 3, 9 } } ); + + VectorType inVector_2( m_cols_2, 2 ); + VectorType outVector_2( m_rows_2, 0 ); + m_2.vectorProduct( inVector_2, outVector_2 ); + + EXPECT_EQ( outVector_2.getElement( 0 ), 12 ); + EXPECT_EQ( outVector_2.getElement( 1 ), 32 ); + EXPECT_EQ( outVector_2.getElement( 2 ), 32 ); + EXPECT_EQ( outVector_2.getElement( 3 ), 34 ); + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 1 2 3 4 \ + * | 2 5 0 0 | + * | 3 0 6 0 | + * \ 4 0 0 7 / + */ + + const IndexType m_rows_3 = 4; + const IndexType m_cols_3 = 4; + + Matrix m_3( m_rows_3, m_cols_3, { + { 0, 0, 1 }, { 0, 1, 2 }, { 0, 2, 3 }, { 0, 3, 4 }, + { 1, 0, 2 }, { 1, 1, 5 }, + { 2, 0, 3 }, { 2, 2, 6 }, + { 3, 0, 4 }, { 3, 3, 7 } + } ); + + VectorType inVector_3( { 0, 1, 2, 3 } ); + VectorType outVector_3( m_rows_3, 0 ); + m_3.vectorProduct( inVector_3, outVector_3 ); + + EXPECT_EQ( outVector_3.getElement( 0 ), 20 ); + EXPECT_EQ( outVector_3.getElement( 1 ), 5 ); + EXPECT_EQ( outVector_3.getElement( 2 ), 12 ); + EXPECT_EQ( outVector_3.getElement( 3 ), 21 ); + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 0 3 0 9 0 15 0 \ + * | 0 2 0 6 0 12 0 19 | + * | 3 0 5 0 10 0 16 0 | + * | 0 6 0 8 0 13 0 20 | + * | 9 0 10 0 11 0 17 0 | + * | 0 12 0 13 0 14 0 21 | + * | 15 0 16 0 17 0 18 0 | + * \ 0 19 0 20 0 21 0 22 / + */ + + const IndexType m_rows_4 = 8; + const IndexType m_cols_4 = 8; + + Matrix m_4( m_rows_4, m_cols_4, { + { 0, 0, 1 }, + { 1, 1, 2 }, + { 2, 0, 3 }, { 2, 2, 5 }, + { 3, 1, 6 }, { 3, 3, 8 }, + { 4, 0, 9 }, { 4, 2, 10 }, { 4, 4, 11 }, + { 5, 1, 12 }, { 5, 3, 13 }, { 5, 5, 14 }, + { 6, 0, 15 }, { 6, 2, 16 }, { 6, 4, 17 }, { 6, 6, 18 }, + { 7, 1, 19 }, { 7, 3, 20 }, { 7, 5, 21 }, { 7, 7, 22 } + } ); + + VectorType inVector_4 { 1, 2, 1, 2, 1, 2, 1, 2 }; + VectorType outVector_4( m_rows_4, 0 ); + m_4.vectorProduct( inVector_4, outVector_4 ); + + EXPECT_EQ( outVector_4.getElement( 0 ), 28 ); + EXPECT_EQ( outVector_4.getElement( 1 ), 78 ); + EXPECT_EQ( outVector_4.getElement( 2 ), 34 ); + EXPECT_EQ( outVector_4.getElement( 3 ), 94 ); + EXPECT_EQ( outVector_4.getElement( 4 ), 47 ); + EXPECT_EQ( outVector_4.getElement( 5 ), 120 ); + EXPECT_EQ( outVector_4.getElement( 6 ), 66 ); + EXPECT_EQ( outVector_4.getElement( 7 ), 164 ); + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 0 0 0 0 0 0 0 \ + * | 0 2 0 0 0 0 0 0 | + * | 0 0 3 4 6 9 0 0 | + * | 0 0 4 5 7 10 0 0 | + * | 0 0 6 7 8 11 0 0 | + * | 0 0 9 10 11 12 0 0 | + * | 0 0 0 0 0 0 13 0 | + * \ 0 0 0 0 0 0 0 14 / + */ + + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5( m_rows_5, m_cols_5,{ + { 0, 0, 1 }, + { 1, 1, 2, }, + { 2, 2, 3 }, + { 3, 2, 4 }, { 3, 3, 5 }, + { 4, 2, 6 }, { 4, 3, 7 }, { 4, 4, 8 }, + { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 }, + { 6, 6, 13 }, + { 7, 7, 14 } + } ); + + VectorType inVector_5( { 1, 2, 3, 4, 5, 6, 7, 8 } ); + VectorType outVector_5( m_rows_5, 0.0 ); + m_5.vectorProduct( inVector_5, outVector_5 ); + + EXPECT_EQ( outVector_5.getElement( 0 ), 1*1 ); + EXPECT_EQ( outVector_5.getElement( 1 ), 2*2 ); + EXPECT_EQ( outVector_5.getElement( 2 ), 3*3 + 4*4 + 5*6 + 6*9 ); + EXPECT_EQ( outVector_5.getElement( 3 ), 3*4 + 4*5 + 5*7 + 6*10 ); + EXPECT_EQ( outVector_5.getElement( 4 ), 3*6 + 4*7 + 5*8 + 6*11 ); + EXPECT_EQ( outVector_5.getElement( 5 ), 3*9 + 4*10 + 5*11 + 6*12 ); + EXPECT_EQ( outVector_5.getElement( 6 ), 7*13 ); + EXPECT_EQ( outVector_5.getElement( 7 ), 8*14 ); +} + +template< typename Matrix > +void test_RowsReduction() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 8x8 sparse matrix: + * + * / 1 0 0 0 0 0 0 0 \ + * | 0 2 0 0 0 0 0 0 | + * | 0 0 3 4 6 9 0 0 | + * | 0 0 4 5 7 10 0 0 | + * | 0 0 6 7 8 11 0 0 | + * | 0 0 9 10 11 12 0 0 | + * | 0 0 0 0 0 0 13 0 | + * \ 0 0 0 0 0 0 0 14 / + */ + + const IndexType m_rows_5 = 8; + const IndexType m_cols_5 = 8; + + Matrix m_5( m_rows_5, m_cols_5,{ + { 0, 0, 1 }, + { 1, 1, 2, }, + { 2, 2, 3 }, + { 3, 2, 4 }, { 3, 3, 5 }, + { 4, 2, 6 }, { 4, 3, 7 }, { 4, 4, 8 }, + { 5, 2, 9 }, { 5, 3, 10 }, { 5, 4, 11 }, { 5, 5, 12 }, + { 6, 6, 13 }, + { 7, 7, 14 } + } ); + + //// + // Compute number of non-zero elements in rows. + typename Matrix::RowsCapacitiesType rowLengths( m_rows_5 ); + typename Matrix::RowsCapacitiesType rowLengths_true( { 1, 1, 4, 4, 4, 4, 1, 1 } ); + auto rowLengths_view = rowLengths.getView(); + rowLengths_view = 0; + auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) mutable -> IndexType { + if( value != 0.0 && row != column) + TNL::Algorithms::AtomicOperations< DeviceType >::add( rowLengths_view[ column ], ( IndexType ) 1 ); + return ( value != 0.0 ); + }; + auto reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowLengths_view[ rowIdx ] += value; + }; + m_5.allRowsReduction( fetch, reduce, keep, 0 ); + + EXPECT_EQ( rowLengths_true, rowLengths ); + m_5.getCompressedRowLengths( rowLengths ); + typename Matrix::RowsCapacitiesType rowLengths_symmetric( { 1, 1, 1, 2, 3, 4, 1, 1 } ); + EXPECT_EQ( rowLengths_symmetric, rowLengths ); + + //// + // Compute max norm + /*TNL::Containers::Vector< RealType, DeviceType, IndexType > rowSums( m_5.getRows() ); + auto rowSums_view = rowSums.getView(); + auto max_fetch = [] __cuda_callable__ ( IndexType row, IndexType column, IndexType globalIdx, const RealType& value ) -> IndexType { + return abs( value ); + }; + auto max_reduce = [] __cuda_callable__ ( IndexType& aux, const IndexType a ) { + aux += a; + }; + auto max_keep = [=] __cuda_callable__ ( const IndexType rowIdx, const IndexType value ) mutable { + rowSums_view[ rowIdx ] = value; + }; + m_5.allRowsReduction( max_fetch, max_reduce, max_keep, 0 ); + const RealType maxNorm = TNL::max( rowSums ); + EXPECT_EQ( maxNorm, 260 ) ; // 29+30+31+32+33+34+35+36*/ +} + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols, { + { 0, 0, 4 }, { 0, 1, 1 }, + { 1, 0, 1 }, { 1, 1, 4 }, { 1, 2, 1 }, + { 2, 1, 1 }, { 2, 2, 4 }, { 2, 3, 1 }, + { 3, 2, 1 }, { 3, 3, 4 }, { 3, 4, 1 }, + { 4, 3, 1 }, { 4, 4, 4 } + } ); + + RealType bVector [ 4 ] = { 1, 1, 1, 1 }; + RealType xVector [ 4 ] = { 1, 1, 1, 1 }; + + IndexType row = 0; + RealType omega = 1; + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], 0.0 ); + EXPECT_EQ( xVector[ 1 ], 0.0 ); + EXPECT_EQ( xVector[ 2 ], 0.0 ); + EXPECT_EQ( xVector[ 3 ], 0.25 ); +} + +template< typename Matrix > +void test_SaveAndLoad( const char* filename ) +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 6x5 sparse matrix: + * + * / 1 2 0 0 0 \ + * | 2 3 4 0 0 | + * | 0 4 5 6 0 | + * | 0 0 6 7 8 | + * | 0 0 0 8 9 | + * \ 0 0 0 0 10 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix savedMatrix( 6, 5, { + { 0, 0, 1 }, + { 1, 0, 2 }, { 1, 1, 3 }, + { 2, 1, 4 }, { 2, 2, 5 }, + { 3, 2, 6 }, { 3, 3, 7 }, + { 4, 3, 8 }, { 4, 4, 9 }, + { 5, 4, 10 } } ); + + // Check the set elements + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 3 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 4 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 3, 4 ), 8 ); + + EXPECT_EQ( savedMatrix.getElement( 4, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 4, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 4, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 4, 3 ), 8 ); + EXPECT_EQ( savedMatrix.getElement( 4, 4 ), 9 ); + + EXPECT_EQ( savedMatrix.getElement( 5, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 5, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 5, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 5, 3 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 5, 4 ), 10 ); + + ASSERT_NO_THROW( savedMatrix.save( filename ) ); + + Matrix loadedMatrix; + + ASSERT_NO_THROW( loadedMatrix.load( filename ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 4 ), loadedMatrix.getElement( 0, 4 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 4 ), loadedMatrix.getElement( 1, 4 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 4 ), loadedMatrix.getElement( 2, 4 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 4 ), loadedMatrix.getElement( 3, 4 ) ); + + EXPECT_EQ( savedMatrix.getElement( 4, 0 ), loadedMatrix.getElement( 4, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 1 ), loadedMatrix.getElement( 4, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 2 ), loadedMatrix.getElement( 4, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 3 ), loadedMatrix.getElement( 4, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 4, 4 ), loadedMatrix.getElement( 4, 4 ) ); + + EXPECT_EQ( savedMatrix.getElement( 5, 0 ), loadedMatrix.getElement( 5, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 1 ), loadedMatrix.getElement( 5, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 2 ), loadedMatrix.getElement( 5, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 3 ), loadedMatrix.getElement( 5, 3 ) ); + EXPECT_EQ( savedMatrix.getElement( 5, 4 ), loadedMatrix.getElement( 5, 4 ) ); + EXPECT_EQ( std::remove( filename ), 0 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 sparse matrix: + * + * / 4 1 0 0 \ + * | 1 4 1 0 | + * | 0 1 4 1 | + * \ 0 0 1 4 / + */ + + const IndexType m_rows = 4; + const IndexType m_cols = 4; + + Matrix m( m_rows, m_cols, { + { 0, 0, 4 }, + { 1, 0, 1 }, { 1, 1, 4 }, + { 2, 1, 1 }, { 2, 2, 4 }, + { 3, 2, 1 }, { 3, 3, 4 } + } ); + + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + + couted << "Row: 0 -> Col:0->4 Col:1->1\t\n" + "Row: 1 -> Col:0->1 Col:1->4 Col:2->1\t\n" + "Row: 2 -> Col:1->1 Col:2->4 Col:3->1\t\n" + "Row: 3 -> Col:2->1 Col:3->4\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +#endif diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c23fa4242090ca3c441df81f4fbd6b1583b833d2 --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + SymmetricSparseMatrixTest_CSR.cpp - description + ------------------- + begin : Feb 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SymmetricSparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu new file mode 100644 index 0000000000000000000000000000000000000000..df1d83da0e08cefc0bc314e01ec216bda1905f4a --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + SymmetricSparseMatrixTest_CSR.cu - description + ------------------- + begin : Feb 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "SymmetricSparseMatrixTest_CSR.h" diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h new file mode 100644 index 0000000000000000000000000000000000000000..f6f7ec95a3a93aa6917d88a0f014ddbc44ca92ef --- /dev/null +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest_CSR.h @@ -0,0 +1,61 @@ +/*************************************************************************** + SymmetricSparseMatrixTest_CSR.h - description + ------------------- + begin : Feb 11, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> +#include <TNL/Containers/Segments/CSR.h> +#include <TNL/Matrices/SparseMatrix.h> + +// test fixture for typed tests +//template< typename Matrix > +//class MatrixTest : public ::testing::Test +//{ +//protected: +// using MatrixType = Matrix; +//}; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< long, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Host, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > +#ifdef HAVE_CUDA // Commented types are not supported by atomic operations on GPU. + ,//TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, short, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, int, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< long, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< float, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR >, + //TNL::Matrices::SparseMatrix< double, TNL::Devices::Cuda, long, TNL::Matrices::SymmetricMatrix, TNL::Containers::Segments::CSR > +#endif // HAVE_CUDA +>; + +const char* saveAndLoadTestFileName = "test_SymmetricSparseMatrixTest_CSR_segments"; + +#include "SymmetricSparseMatrixTest.h" + +#endif // HAVE_GTEST + +#include "../main.h" diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3248d352654f119d3a8d6cb2290e92b320d0dc97 --- /dev/null +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cpp @@ -0,0 +1,11 @@ +/*************************************************************************** + TridiagonalMatrixTest.cpp - description + ------------------- + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "TridiagonalMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.cu b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu new file mode 100644 index 0000000000000000000000000000000000000000..16f909fa78a3725ee9040299be7fe2ec6908514d --- /dev/null +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.cu @@ -0,0 +1,11 @@ +/*************************************************************************** + TridiagonalMatrixTest.cu - description + ------------------- + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include "TridiagonalMatrixTest.h" \ No newline at end of file diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h new file mode 100644 index 0000000000000000000000000000000000000000..d9dc06599981ca920780bfe52e35e09e8f65f854 --- /dev/null +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -0,0 +1,1636 @@ +/*************************************************************************** + TridiagonalMatrixTest.h - description + ------------------- + begin : Jan 8, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <sstream> +#include <TNL/Devices/Host.h> +#include <TNL/Matrices/Matrix.h> +#include <TNL/Matrices/Tridiagonal.h> +#include <TNL/Containers/Array.h> + +#include <TNL/Containers/Vector.h> +#include <TNL/Containers/VectorView.h> +#include <TNL/Math.h> +#include <iostream> + +using Tridiagonal_host_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >; +using Tridiagonal_host_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >; + +using Tridiagonal_cuda_float = TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >; +using Tridiagonal_cuda_int = TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >; + +static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl"; + +#ifdef HAVE_GTEST +#include <type_traits> + +#include <gtest/gtest.h> + +void test_GetSerializationType() +{ + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); +} + +template< typename Matrix > +void test_SetDimensions() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 9; + const IndexType cols = 8; + + Matrix m; + m.setDimensions( rows, cols ); + + EXPECT_EQ( m.getRows(), 9 ); + EXPECT_EQ( m.getColumns(), 8 ); +} + +template< typename Matrix1, typename Matrix2 > +void test_SetLike() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix1 m1; + m1.reset(); + m1.setDimensions( rows + 1, cols + 2 ); + + Matrix2 m2; + m2.reset(); + m2.setDimensions( rows, cols ); + + m1.setLike( m2 ); + + EXPECT_EQ( m1.getRows(), m2.getRows() ); + EXPECT_EQ( m1.getColumns(), m2.getColumns() ); +} + +template< typename Matrix > +void test_GetCompressedRowLengths() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 10; + const IndexType cols = 11; + + Matrix m( rows, cols ); + + // Insert values into the rows. + RealType value = 1; + + for( IndexType i = 0; i < 2; i++ ) // 0th row -> 2 elements + m.setElement( 0, i, value++ ); + + for( IndexType i = 0; i < 3; i++ ) // 1st row -> 3 elements + m.setElement( 1, i, value++ ); + + for( IndexType i = 1; i < 3; i++ ) // 2nd row -> 2 elements + m.setElement( 2, i, value++ ); + + for( IndexType i = 2; i < 5; i++ ) // 3rd row -> 3 elements + m.setElement( 3, i, value++ ); + + for( IndexType i = 3; i < 6; i++ ) // 4th row -> 3 elements + m.setElement( 4, i, value++ ); + + for( IndexType i = 4; i < 6; i++ ) // 5th row -> 2 elements + m.setElement( 5, i, value++ ); + + for( IndexType i = 5; i < 8; i++ ) // 6th row -> 3 elements + m.setElement( 6, i, value++ ); + + for( IndexType i = 6; i < 8; i++ ) // 7th row -> 2 elements + m.setElement( 7, i, value++ ); + + for( IndexType i = 7; i < 10; i++ ) // 8th row -> 3 elements + m.setElement( 8, i, value++ ); + + for( IndexType i = 8; i < 11; i++ ) // 9th row -> 3 elements + m.setElement( 9, i, value++ ); + + typename Matrix::CompressedRowLengthsVector rowLengths( rows ); + rowLengths = 0; + m.getCompressedRowLengths( rowLengths ); + typename Matrix::CompressedRowLengthsVector correctRowLengths{ 2, 3, 2, 3, 3, 2, 3, 2, 3, 3 }; + EXPECT_EQ( rowLengths, correctRowLengths ); +} + +template< typename Matrix > +void test_GetRowLength() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 8; + const IndexType cols = 7; + + Matrix m( rows, cols ); + + EXPECT_EQ( m.getRowLength( 0 ), 2 ); + EXPECT_EQ( m.getRowLength( 1 ), 3 ); + EXPECT_EQ( m.getRowLength( 2 ), 3 ); + EXPECT_EQ( m.getRowLength( 3 ), 3 ); + EXPECT_EQ( m.getRowLength( 4 ), 3 ); + EXPECT_EQ( m.getRowLength( 5 ), 3 ); + EXPECT_EQ( m.getRowLength( 6 ), 2 ); + EXPECT_EQ( m.getRowLength( 7 ), 1 ); +} + +template< typename Matrix > +void test_GetAllocatedElementsCount() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m( rows, cols ); + + EXPECT_EQ( m.getAllocatedElementsCount(), 21 ); +} + +template< typename Matrix > +void test_GetNumberOfNonzeroMatrixElements() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 7x6 matrix: + * + * / 0 1 0 0 0 0 \ + * | 2 3 4 0 0 0 | + * | 0 5 6 7 0 0 | + * | 0 0 8 9 10 0 | + * | 0 0 0 11 12 13 | + * | 0 0 0 0 14 0 | + * \ 0 0 0 0 0 16 / + */ + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m( rows, cols ); + + RealType value = 0; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ ) + m.setElement( i, j, value++ ); + + m.setElement( 5, 5, 0); + + EXPECT_EQ( m.getNumberOfNonzeroMatrixElements(), 15 ); +} + +template< typename Matrix > +void test_Reset() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols ); + + m.reset(); + + EXPECT_EQ( m.getRows(), 0 ); + EXPECT_EQ( m.getColumns(), 0 ); +} + +template< typename Matrix > +void test_SetValue() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 7x6 matrix: + * + * / 0 1 0 0 0 0 \ + * | 2 3 4 0 0 0 | + * | 0 5 6 7 0 0 | + * | 0 0 8 9 10 0 | + * | 0 0 0 11 12 13 | + * | 0 0 0 0 14 0 | + * \ 0 0 0 0 0 16 / + */ + const IndexType rows = 7; + const IndexType cols = 6; + + Matrix m( rows, cols ); + + RealType value = 0; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = TNL::max( 0, i - 1 ); j < TNL::min( cols, i + 2 ); j++ ) + m.setElement( i, j, value++ ); + + m.setElement( 5, 5, 0); + + EXPECT_EQ( m.getElement( 0, 0 ), 0 ); + EXPECT_EQ( m.getElement( 0, 1 ), 1 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 2 ); + EXPECT_EQ( m.getElement( 1, 1 ), 3 ); + EXPECT_EQ( m.getElement( 1, 2 ), 4 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 5 ); + EXPECT_EQ( m.getElement( 2, 2 ), 6 ); + EXPECT_EQ( m.getElement( 2, 3 ), 7 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 8 ); + EXPECT_EQ( m.getElement( 3, 3 ), 9 ); + EXPECT_EQ( m.getElement( 3, 4 ), 10 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 11 ); + EXPECT_EQ( m.getElement( 4, 4 ), 12 ); + EXPECT_EQ( m.getElement( 4, 5 ), 13 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 14 ); + EXPECT_EQ( m.getElement( 5, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 16 ); + + // Set the values of all elements to a certain number + m.setValue( 42 ); + + EXPECT_EQ( m.getElement( 0, 0 ), 42 ); + EXPECT_EQ( m.getElement( 0, 1 ), 42 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 42 ); + EXPECT_EQ( m.getElement( 1, 1 ), 42 ); + EXPECT_EQ( m.getElement( 1, 2 ), 42 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 42 ); + EXPECT_EQ( m.getElement( 2, 2 ), 42 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 42 ); + EXPECT_EQ( m.getElement( 3, 3 ), 42 ); + EXPECT_EQ( m.getElement( 3, 4 ), 42 ); + EXPECT_EQ( m.getElement( 3, 5 ), 0 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 42 ); + EXPECT_EQ( m.getElement( 4, 4 ), 42 ); + EXPECT_EQ( m.getElement( 4, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 42 ); + EXPECT_EQ( m.getElement( 5, 5 ), 42 ); + + EXPECT_EQ( m.getElement( 6, 0 ), 0 ); + EXPECT_EQ( m.getElement( 6, 1 ), 0 ); + EXPECT_EQ( m.getElement( 6, 2 ), 0 ); + EXPECT_EQ( m.getElement( 6, 3 ), 0 ); + EXPECT_EQ( m.getElement( 6, 4 ), 0 ); + EXPECT_EQ( m.getElement( 6, 5 ), 42 ); +} + +template< typename Matrix > +void test_SetElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x5 matrix: + * + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * \ 0 0 0 24 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; + + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) > 1 ) + { + EXPECT_THROW( m.setElement( i, j, value++ ), std::logic_error ); + } + else + m.setElement( i, j, value++ ); + } + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); +} + +template< typename Matrix > +void test_AddElement() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 6x5 matrix: + * + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * | 0 0 0 24 25 | + * \ 0 0 0 0 30 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 matrix: + * + * / 1 2 0 0 0 \ / 1 2 0 0 0 \ / 3 6 0 0 0 \ + * | 6 7 8 0 0 | | 3 4 5 0 0 | | 15 18 21 0 0 | + * 2 * | 0 12 13 14 0 | + | 0 6 7 8 0 | = | 0 30 33 36 0 | + * | 0 0 18 19 20 | | 0 0 9 10 11 | | 0 0 45 48 51 | + * | 0 0 0 24 25 | | 0 0 0 12 13 | | 0 0 0 60 63 | + * \ 0 0 0 0 30 / \ 0 0 0 0 14 / \ 0 0 0 0 74 / + */ + + RealType newValue = 1; + RealType multiplicator = 2; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + if( abs( i - j ) <= 1 ) + m.addElement( i, j, newValue++, multiplicator ); + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 15 ); + EXPECT_EQ( m.getElement( 1, 1 ), 18 ); + EXPECT_EQ( m.getElement( 1, 2 ), 21 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 30 ); + EXPECT_EQ( m.getElement( 2, 2 ), 33 ); + EXPECT_EQ( m.getElement( 2, 3 ), 36 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 45 ); + EXPECT_EQ( m.getElement( 3, 3 ), 48 ); + EXPECT_EQ( m.getElement( 3, 4 ), 51 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 60 ); + EXPECT_EQ( m.getElement( 4, 4 ), 63 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 74 ); +} + +template< typename Matrix > +void test_SetRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 3x7 matrix: + * + * / 1 2 0 0 0 0 0 \ + * | 8 9 10 0 0 0 0 | + * \ 0 16 17 18 0 0 0 / + */ + const IndexType rows = 3; + const IndexType cols = 7; + + Matrix m( rows, cols ); + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 3 ][ 3 ] { + { 1, 2, 0 }, + { 8, 9, 10 }, + { 16, 17, 18 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 3; i++ ) + { + if( rowIdx == 0 && i > 1 ) + break; + row.setElement( i, values[ rowIdx ][ i ] ); + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 3, f ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + EXPECT_EQ( m.getElement( 0, 5 ), 0 ); + EXPECT_EQ( m.getElement( 0, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 8 ); + EXPECT_EQ( m.getElement( 1, 1 ), 9 ); + EXPECT_EQ( m.getElement( 1, 2 ), 10 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + EXPECT_EQ( m.getElement( 1, 5 ), 0 ); + EXPECT_EQ( m.getElement( 1, 6 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 16 ); + EXPECT_EQ( m.getElement( 2, 2 ), 17 ); + EXPECT_EQ( m.getElement( 2, 3 ), 18 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + EXPECT_EQ( m.getElement( 2, 5 ), 0 ); + EXPECT_EQ( m.getElement( 2, 6 ), 0 ); +} + +template< typename Matrix > +void test_AddRow() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + /* + * Sets up the following 6x5 matrix: + * + * / 1 2 0 0 0 \ + * | 6 7 8 0 0 | + * | 0 12 13 14 0 | + * | 0 0 18 19 20 | + * | 0 0 0 24 25 | + * \ 0 0 0 0 30 / + */ + + const IndexType rows = 6; + const IndexType cols = 5; + + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } + + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 sparse matrix: + * + * / 0 0 0 0 0 0 \ / 1 2 0 0 0 \ / 11 11 0 0 0 \ / 11 11 0 0 0 \ + * | 0 1 0 0 0 0 | | 6 7 8 0 0 | | 22 22 22 0 0 | | 28 29 30 0 0 | + * | 0 0 2 0 0 0 | * | 0 12 13 14 0 | + | 0 33 33 33 0 | = | 0 57 59 61 0 | + * | 0 0 0 3 0 0 | | 0 0 18 19 20 | | 0 0 44 44 44 | | 0 0 98 101 104 | + * | 0 0 0 0 4 0 | | 0 0 0 24 25 | | 0 0 0 55 55 | | 0 0 0 151 155 | + * \ 0 0 0 0 0 5 / \ 0 0 0 0 30 / \ 0 0 0 0 66 / \ 0 0 0 0 216 / + */ + + auto matrix_view = m.getView(); + auto f = [=] __cuda_callable__ ( IndexType rowIdx ) mutable { + RealType values[ 6 ][ 3 ] { + { 11, 11, 0 }, + { 22, 22, 22 }, + { 33, 33, 33 }, + { 44, 44, 44 }, + { 55, 55, 55 }, + { 66, 66, 66 } }; + auto row = matrix_view.getRow( rowIdx ); + for( IndexType i = 0; i < 3; i++ ) + { + RealType& val = row.getValue( i ); + val = rowIdx * val + values[ rowIdx ][ i ]; + } + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( 0, 6, f ); + + + EXPECT_EQ( m.getElement( 0, 0 ), 11 ); + EXPECT_EQ( m.getElement( 0, 1 ), 11 ); + EXPECT_EQ( m.getElement( 0, 2 ), 0 ); + EXPECT_EQ( m.getElement( 0, 3 ), 0 ); + EXPECT_EQ( m.getElement( 0, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 28 ); + EXPECT_EQ( m.getElement( 1, 1 ), 29 ); + EXPECT_EQ( m.getElement( 1, 2 ), 30 ); + EXPECT_EQ( m.getElement( 1, 3 ), 0 ); + EXPECT_EQ( m.getElement( 1, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 0 ); + EXPECT_EQ( m.getElement( 2, 1 ), 57 ); + EXPECT_EQ( m.getElement( 2, 2 ), 59 ); + EXPECT_EQ( m.getElement( 2, 3 ), 61 ); + EXPECT_EQ( m.getElement( 2, 4 ), 0 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 0 ); + EXPECT_EQ( m.getElement( 3, 1 ), 0 ); + EXPECT_EQ( m.getElement( 3, 2 ), 98 ); + EXPECT_EQ( m.getElement( 3, 3 ), 101 ); + EXPECT_EQ( m.getElement( 3, 4 ), 104 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 0 ); + EXPECT_EQ( m.getElement( 4, 1 ), 0 ); + EXPECT_EQ( m.getElement( 4, 2 ), 0 ); + EXPECT_EQ( m.getElement( 4, 3 ), 151 ); + EXPECT_EQ( m.getElement( 4, 4 ), 155 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 0 ); + EXPECT_EQ( m.getElement( 5, 1 ), 0 ); + EXPECT_EQ( m.getElement( 5, 2 ), 0 ); + EXPECT_EQ( m.getElement( 5, 3 ), 0 ); + EXPECT_EQ( m.getElement( 5, 4 ), 216 ); +} + +template< typename Matrix > +void test_VectorProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } + + using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; + + VectorType inVector( 4 ); + inVector = 2; + + VectorType outVector( 5 ); + outVector = 0; + + m.vectorProduct( inVector, outVector); + + EXPECT_EQ( outVector.getElement( 0 ), 6 ); + EXPECT_EQ( outVector.getElement( 1 ), 36 ); + EXPECT_EQ( outVector.getElement( 2 ), 66 ); + EXPECT_EQ( outVector.getElement( 3 ), 62 ); + EXPECT_EQ( outVector.getElement( 4 ), 40 ); +} + +template< typename Matrix1, typename Matrix2 = Matrix1 > +void test_AddMatrix() +{ + using RealType = typename Matrix1::RealType; + using DeviceType = typename Matrix1::DeviceType; + using IndexType = typename Matrix1::IndexType; + + /* + * Sets up the following 5x4 matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix1 m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } + + /* + * Sets up the following 5x4 matrix: + * + * / 1 2 0 0 \ + * | 3 4 5 0 | + * | 0 6 7 8 | + * | 0 0 9 10 | + * \ 0 0 0 11 / + */ + Matrix2 m2( rows, cols ); + + RealType newValue = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++) + if( abs( i - j ) <= 1 ) + m2.setElement( i, j, newValue++ ); + + /* + * Compute the following 5x4 matrix: + * + * / 1 2 0 0 \ / 1 2 0 0 \ / 3 6 0 0 \ + * | 5 6 7 0 | | 3 4 5 0 | | 11 14 17 0 | + * | 0 10 11 12 | + 2 * | 0 6 7 8 | = | 0 22 25 28 | + * | 0 0 15 16 | | 0 0 9 10 | | 0 0 33 36 | + * \ 0 0 0 20 / \ 0 0 0 11 / \ 0 0 0 42 / + */ + + Matrix1 mResult; + mResult.reset(); + mResult.setDimensions( rows, cols ); + + mResult = m; + + RealType matrixMultiplicator = 2; + RealType thisMatrixMultiplicator = 1; + + mResult.addMatrix( m2, matrixMultiplicator, thisMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), matrixMultiplicator * m2.getElement( 0, 0 ) + thisMatrixMultiplicator * m.getElement( 0, 0 ) ); + EXPECT_EQ( mResult.getElement( 0, 1 ), matrixMultiplicator * m2.getElement( 0, 1 ) + thisMatrixMultiplicator * m.getElement( 0, 1 ) ); + EXPECT_EQ( mResult.getElement( 0, 2 ), matrixMultiplicator * m2.getElement( 0, 2 ) + thisMatrixMultiplicator * m.getElement( 0, 2 ) ); + EXPECT_EQ( mResult.getElement( 0, 3 ), matrixMultiplicator * m2.getElement( 0, 3 ) + thisMatrixMultiplicator * m.getElement( 0, 3 ) ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), matrixMultiplicator * m2.getElement( 1, 0 ) + thisMatrixMultiplicator * m.getElement( 1, 0 ) ); + EXPECT_EQ( mResult.getElement( 1, 1 ), matrixMultiplicator * m2.getElement( 1, 1 ) + thisMatrixMultiplicator * m.getElement( 1, 1 ) ); + EXPECT_EQ( mResult.getElement( 1, 2 ), matrixMultiplicator * m2.getElement( 1, 2 ) + thisMatrixMultiplicator * m.getElement( 1, 2 ) ); + EXPECT_EQ( mResult.getElement( 1, 3 ), matrixMultiplicator * m2.getElement( 1, 3 ) + thisMatrixMultiplicator * m.getElement( 1, 3 ) ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), matrixMultiplicator * m2.getElement( 2, 0 ) + thisMatrixMultiplicator * m.getElement( 2, 0 ) ); + EXPECT_EQ( mResult.getElement( 2, 1 ), matrixMultiplicator * m2.getElement( 2, 1 ) + thisMatrixMultiplicator * m.getElement( 2, 1 ) ); + EXPECT_EQ( mResult.getElement( 2, 2 ), matrixMultiplicator * m2.getElement( 2, 2 ) + thisMatrixMultiplicator * m.getElement( 2, 2 ) ); + EXPECT_EQ( mResult.getElement( 2, 3 ), matrixMultiplicator * m2.getElement( 2, 3 ) + thisMatrixMultiplicator * m.getElement( 2, 3 ) ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), matrixMultiplicator * m2.getElement( 3, 0 ) + thisMatrixMultiplicator * m.getElement( 3, 0 ) ); + EXPECT_EQ( mResult.getElement( 3, 1 ), matrixMultiplicator * m2.getElement( 3, 1 ) + thisMatrixMultiplicator * m.getElement( 3, 1 ) ); + EXPECT_EQ( mResult.getElement( 3, 2 ), matrixMultiplicator * m2.getElement( 3, 2 ) + thisMatrixMultiplicator * m.getElement( 3, 2 ) ); + EXPECT_EQ( mResult.getElement( 3, 3 ), matrixMultiplicator * m2.getElement( 3, 3 ) + thisMatrixMultiplicator * m.getElement( 3, 3 ) ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), matrixMultiplicator * m2.getElement( 4, 0 ) + thisMatrixMultiplicator * m.getElement( 4, 0 ) ); + EXPECT_EQ( mResult.getElement( 4, 1 ), matrixMultiplicator * m2.getElement( 4, 1 ) + thisMatrixMultiplicator * m.getElement( 4, 1 ) ); + EXPECT_EQ( mResult.getElement( 4, 2 ), matrixMultiplicator * m2.getElement( 4, 2 ) + thisMatrixMultiplicator * m.getElement( 4, 2 ) ); + EXPECT_EQ( mResult.getElement( 4, 3 ), matrixMultiplicator * m2.getElement( 4, 3 ) + thisMatrixMultiplicator * m.getElement( 4, 3 ) ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 3 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 6 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 11 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 14 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 17 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 22 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 25 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 28 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 33 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 36 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 0 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 42 ); +} + +template< typename Matrix > +void test_GetMatrixProduct() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 5x4 matrix: + * + * / 1 2 3 4 \ + * | 5 6 7 8 | + * | 9 10 11 12 | + * | 13 14 15 16 | + * \ 17 18 19 20 / + */ + const IndexType leftRows = 5; + const IndexType leftCols = 4; + + Matrix leftMatrix; + leftMatrix.reset(); + leftMatrix.setDimensions( leftRows, leftCols ); + + RealType value = 1; + for( IndexType i = 0; i < leftRows; i++ ) + for( IndexType j = 0; j < leftCols; j++) + leftMatrix.setElement( i, j, value++ ); + +/* + * Sets up the following 4x5 matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * \ 16 17 18 19 20 / + */ + const IndexType rightRows = 4; + const IndexType rightCols = 5; + + Matrix rightMatrix; + rightMatrix.reset(); + rightMatrix.setDimensions( rightRows, rightCols ); + + RealType newValue = 1; + for( IndexType i = 0; i < rightRows; i++ ) + for( IndexType j = 0; j < rightCols; j++) + rightMatrix.setElement( i, j, newValue++ ); + +/* + * Sets up the following 5x5 resulting matrix: + * + * / 0 0 0 0 \ + * | 0 0 0 0 | + * | 0 0 0 0 | + * | 0 0 0 0 | + * \ 0 0 0 0 / + */ + + Matrix mResult; + mResult.reset(); + mResult.setDimensions( leftRows, rightCols ); + mResult.setValue( 0 ); + + RealType leftMatrixMultiplicator = 1; + RealType rightMatrixMultiplicator = 2; +/* + * / 1 2 3 4 \ / 220 240 260 280 300 \ + * | 5 6 7 8 | / 1 2 3 4 5 \ | 492 544 596 648 700 | + * 1 * | 9 10 11 12 | * 2 * | 6 7 8 9 10 | = | 764 848 932 1016 1100 | + * | 13 14 15 16 | | 11 12 13 14 15 | | 1036 1152 1268 1384 1500 | + * \ 17 18 19 20 / \ 16 17 18 19 20 / \ 1308 1456 1604 1752 1900 / + */ + + mResult.getMatrixProduct( leftMatrix, rightMatrix, leftMatrixMultiplicator, rightMatrixMultiplicator ); + + EXPECT_EQ( mResult.getElement( 0, 0 ), 220 ); + EXPECT_EQ( mResult.getElement( 0, 1 ), 240 ); + EXPECT_EQ( mResult.getElement( 0, 2 ), 260 ); + EXPECT_EQ( mResult.getElement( 0, 3 ), 280 ); + EXPECT_EQ( mResult.getElement( 0, 4 ), 300 ); + + EXPECT_EQ( mResult.getElement( 1, 0 ), 492 ); + EXPECT_EQ( mResult.getElement( 1, 1 ), 544 ); + EXPECT_EQ( mResult.getElement( 1, 2 ), 596 ); + EXPECT_EQ( mResult.getElement( 1, 3 ), 648 ); + EXPECT_EQ( mResult.getElement( 1, 4 ), 700 ); + + EXPECT_EQ( mResult.getElement( 2, 0 ), 764 ); + EXPECT_EQ( mResult.getElement( 2, 1 ), 848 ); + EXPECT_EQ( mResult.getElement( 2, 2 ), 932 ); + EXPECT_EQ( mResult.getElement( 2, 3 ), 1016 ); + EXPECT_EQ( mResult.getElement( 2, 4 ), 1100 ); + + EXPECT_EQ( mResult.getElement( 3, 0 ), 1036 ); + EXPECT_EQ( mResult.getElement( 3, 1 ), 1152 ); + EXPECT_EQ( mResult.getElement( 3, 2 ), 1268 ); + EXPECT_EQ( mResult.getElement( 3, 3 ), 1384 ); + EXPECT_EQ( mResult.getElement( 3, 4 ), 1500 ); + + EXPECT_EQ( mResult.getElement( 4, 0 ), 1308 ); + EXPECT_EQ( mResult.getElement( 4, 1 ), 1456 ); + EXPECT_EQ( mResult.getElement( 4, 2 ), 1604 ); + EXPECT_EQ( mResult.getElement( 4, 3 ), 1752 ); + EXPECT_EQ( mResult.getElement( 4, 4 ), 1900 ); +} + +template< typename Matrix > +void test_GetTransposition() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 3x2 matrix: + * + * / 1 2 \ + * | 3 4 | + * \ 5 6 / + */ + const IndexType rows = 3; + const IndexType cols = 2; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + m.print( std::cout ); + +/* + * Sets up the following 2x3 matrix: + * + * / 0 0 0 \ + * \ 0 0 0 / + */ + Matrix mTransposed; + mTransposed.reset(); + mTransposed.setDimensions( cols, rows ); + + mTransposed.print( std::cout ); + + RealType matrixMultiplicator = 1; + + mTransposed.getTransposition( m, matrixMultiplicator ); + + mTransposed.print( std::cout ); + +/* + * Should result in the following 2x3 matrix: + * + * / 1 3 5 \ + * \ 2 4 6 / + */ + + EXPECT_EQ( mTransposed.getElement( 0, 0 ), 1 ); + EXPECT_EQ( mTransposed.getElement( 0, 1 ), 3 ); + EXPECT_EQ( mTransposed.getElement( 0, 2 ), 5 ); + + EXPECT_EQ( mTransposed.getElement( 1, 0 ), 2 ); + EXPECT_EQ( mTransposed.getElement( 1, 1 ), 4 ); + EXPECT_EQ( mTransposed.getElement( 1, 2 ), 6 ); +} + + +template< typename Matrix > +void test_PerformSORIteration() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; +/* + * Sets up the following 4x4 matrix: + * + * / 4 1 1 1 \ + * | 1 4 1 1 | + * | 1 1 4 1 | + * \ 1 1 1 4 / + */ + const IndexType rows = 4; + const IndexType cols = 4; + + Matrix m; + m.reset(); + m.setDimensions( rows, cols ); + + m.setElement( 0, 0, 4.0 ); // 0th row + m.setElement( 0, 1, 1.0 ); + m.setElement( 0, 2, 1.0 ); + m.setElement( 0, 3, 1.0 ); + + m.setElement( 1, 0, 1.0 ); // 1st row + m.setElement( 1, 1, 4.0 ); + m.setElement( 1, 2, 1.0 ); + m.setElement( 1, 3, 1.0 ); + + m.setElement( 2, 0, 1.0 ); + m.setElement( 2, 1, 1.0 ); // 2nd row + m.setElement( 2, 2, 4.0 ); + m.setElement( 2, 3, 1.0 ); + + m.setElement( 3, 0, 1.0 ); // 3rd row + m.setElement( 3, 1, 1.0 ); + m.setElement( 3, 2, 1.0 ); + m.setElement( 3, 3, 4.0 ); + + RealType bVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; + RealType xVector [ 4 ] = { 1.0, 1.0, 1.0, 1.0 }; + + IndexType row = 0; + RealType omega = 1; + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], 1.0 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 1.0 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 0.15625 ); + EXPECT_EQ( xVector[ 3 ], 1.0 ); + + m.performSORIteration( bVector, row++, xVector, omega); + + EXPECT_EQ( xVector[ 0 ], -0.5 ); + EXPECT_EQ( xVector[ 1 ], -0.125 ); + EXPECT_EQ( xVector[ 2 ], 0.15625 ); + EXPECT_EQ( xVector[ 3 ], 0.3671875 ); +} + +template< typename Matrix > +void test_AssignmentOperator() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + constexpr bool rowMajorOrder = Matrix::getRowMajorOrder(); + + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >; + + const IndexType rows( 10 ), columns( 10 ); + TridiagonalHost hostMatrix( rows, columns ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( abs( i - j ) <= 1 ) + hostMatrix.setElement( i, j, i + j ); + + Matrix matrix( rows, columns ); + matrix.getValues() = 0.0; + matrix = hostMatrix; + for( IndexType i = 0; i < columns; i++ ) + for( IndexType j = 0; j < rows; j++ ) + if( abs( i - j ) <= 1 ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + +#ifdef HAVE_CUDA + TridiagonalCuda cudaMatrix( rows, columns ); + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + if( abs( i - j ) <= 1 ) + cudaMatrix.setElement( i, j, i + j ); + + matrix.getValues() = 0.0; + matrix = cudaMatrix; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < columns; j++ ) + { + if( abs( i - j ) <= 1 ) + EXPECT_EQ( matrix.getElement( i, j ), i + j ); + else + EXPECT_EQ( matrix.getElement( i, j ), 0.0 ); + } +#endif +} + + +template< typename Matrix > +void test_SaveAndLoad() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 4x4 matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * \ 0 0 15 16 / + */ + const IndexType rows = 4; + const IndexType cols = 4; + + Matrix savedMatrix( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + { + if( abs( i - j ) <= 1 ) + savedMatrix.setElement( i, j, value ); + value++; + } + + ASSERT_NO_THROW( savedMatrix.save( TEST_FILE_NAME ) ); + + Matrix loadedMatrix; + + ASSERT_NO_THROW( loadedMatrix.load( TEST_FILE_NAME ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), loadedMatrix.getElement( 0, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), loadedMatrix.getElement( 0, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), loadedMatrix.getElement( 0, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), loadedMatrix.getElement( 0, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), loadedMatrix.getElement( 1, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), loadedMatrix.getElement( 1, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), loadedMatrix.getElement( 1, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), loadedMatrix.getElement( 1, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), loadedMatrix.getElement( 2, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), loadedMatrix.getElement( 2, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), loadedMatrix.getElement( 2, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), loadedMatrix.getElement( 2, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), loadedMatrix.getElement( 3, 0 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), loadedMatrix.getElement( 3, 1 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), loadedMatrix.getElement( 3, 2 ) ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), loadedMatrix.getElement( 3, 3 ) ); + + EXPECT_EQ( savedMatrix.getElement( 0, 0 ), 1 ); + EXPECT_EQ( savedMatrix.getElement( 0, 1 ), 2 ); + EXPECT_EQ( savedMatrix.getElement( 0, 2 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 0, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 1, 0 ), 5 ); + EXPECT_EQ( savedMatrix.getElement( 1, 1 ), 6 ); + EXPECT_EQ( savedMatrix.getElement( 1, 2 ), 7 ); + EXPECT_EQ( savedMatrix.getElement( 1, 3 ), 0 ); + + EXPECT_EQ( savedMatrix.getElement( 2, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 2, 1 ), 10 ); + EXPECT_EQ( savedMatrix.getElement( 2, 2 ), 11 ); + EXPECT_EQ( savedMatrix.getElement( 2, 3 ), 12 ); + + EXPECT_EQ( savedMatrix.getElement( 3, 0 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 1 ), 0 ); + EXPECT_EQ( savedMatrix.getElement( 3, 2 ), 15 ); + EXPECT_EQ( savedMatrix.getElement( 3, 3 ), 16 ); +} + +template< typename Matrix > +void test_Print() +{ + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 0 0 \ + * | 5 6 7 0 | + * | 0 10 11 12 | + * | 0 0 15 16 | + * \ 0 0 0 20 / + */ + const IndexType rows = 5; + const IndexType cols = 4; + + Matrix m( rows, cols ); + + RealType value = 1; + for( IndexType i = 0; i < rows; i++) + for( IndexType j = 0; j < cols; j++) + { + if( abs( i - j ) <= 1 ) + m.setElement( i, j, value ); + value++; + } + + std::stringstream printed; + std::stringstream couted; + + //change the underlying buffer and save the old buffer + auto old_buf = std::cout.rdbuf(printed.rdbuf()); + + m.print( std::cout ); //all the std::cout goes to ss + + std::cout.rdbuf(old_buf); //reset + couted << "Row: 0 -> Col:0->1\t Col:1->2\t\n" + "Row: 1 -> Col:0->5\t Col:1->6\t Col:2->7\t\n" + "Row: 2 -> Col:1->10\t Col:2->11\t Col:3->12\t\n" + "Row: 3 -> Col:2->15\t Col:3->16\t\n" + "Row: 4 -> Col:3->20\t\n"; + + EXPECT_EQ( printed.str(), couted.str() ); +} + +// test fixture for typed tests +template< typename Matrix > +class MatrixTest : public ::testing::Test +{ +protected: + using MatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using MatrixTypes = ::testing::Types +< + TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, short >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, short >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, short >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, short >, + TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, int >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, int >, + TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, long >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Host, long >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, long >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, short >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, short >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, short >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, short >, + TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, int >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, int >, + TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, long >, + TNL::Matrices::Tridiagonal< long, TNL::Devices::Cuda, long >, + TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, long >, + TNL::Matrices::Tridiagonal< double, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( MatrixTest, MatrixTypes ); + +TYPED_TEST( MatrixTest, getSerializationType ) +{ + test_GetSerializationType(); +} + +TYPED_TEST( MatrixTest, setDimensionsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetDimensions< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setLikeTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetLike< MatrixType, MatrixType >(); +} + +TYPED_TEST( MatrixTest, getCompressedRowLengthTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetCompressedRowLengths< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getRowLengthTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetRowLength< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getAllocatedElementsCountTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetAllocatedElementsCount< MatrixType >(); +} + +TYPED_TEST( MatrixTest, getNumberOfNonzeroMatrixElementsTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_GetNumberOfNonzeroMatrixElements< MatrixType >(); +} + +TYPED_TEST( MatrixTest, resetTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Reset< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setValueTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetValue< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addElementTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddElement< MatrixType >(); +} + +TYPED_TEST( MatrixTest, setRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SetRow< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addRowTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddRow< MatrixType >(); +} + +TYPED_TEST( MatrixTest, vectorProductTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_VectorProduct< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addMatrixTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AddMatrix< MatrixType >(); +} + +TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering ) +{ + using MatrixType = typename TestFixture::MatrixType; + + using RealType = typename MatrixType::RealType; + using DeviceType = typename MatrixType::DeviceType; + using IndexType = typename MatrixType::IndexType; + using RealAllocatorType = typename MatrixType::RealAllocatorType; + using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >; + + test_AddMatrix< MatrixType, MatrixType2 >(); +} + +TYPED_TEST( MatrixTest, assignmentOperatorTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_AssignmentOperator< MatrixType >(); +} + +TYPED_TEST( MatrixTest, saveAndLoadTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_SaveAndLoad< MatrixType >(); +} + +TYPED_TEST( MatrixTest, printTest ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_Print< MatrixType >(); +} + +//// test_getType is not general enough yet. DO NOT TEST IT YET. + +//TEST( TridiagonalMatrixTest, Tridiagonal_GetTypeTest_Host ) +//{ +// host_test_GetType< Tridiagonal_host_float, Tridiagonal_host_int >(); +//} +// +//#ifdef HAVE_CUDA +//TEST( TridiagonalMatrixTest, Tridiagonal_GetTypeTest_Cuda ) +//{ +// cuda_test_GetType< Tridiagonal_cuda_float, Tridiagonal_cuda_int >(); +//} +//#endif + +/*TEST( TridiagonalMatrixTest, Tridiagonal_getMatrixProductTest_Host ) +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(609): error: no instance of function template \"TNL::Matrices::TridiagonalMatrixProductKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, Tridiagonal_host_int *, const int, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix1=Tridiagonal_host_int, Matrix2=Tridiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1315): here\n\n"; +} + +#ifdef HAVE_CUDA +TEST( TridiagonalMatrixTest, Tridiagonal_getMatrixProductTest_Cuda ) +{ + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on GPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(510): error: identifier \"tnlCudaMin\" is undefined\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::TridiagonalMatrixProductKernel<Real,Index,Matrix1,Matrix2,tileDim,tileRowBlockSize>(TNL::Matrices::Tridiagonal<Real, TNL::Devices::Cuda, Index> *, const Matrix1 *, const Matrix2 *, Real, Real, Index, Index) [with Real=int, Index=int, Matrix1=Tridiagonal_cuda_int, Matrix2=Tridiagonal_cuda_int, tileDim=32, tileRowBlockSize=8]\"\n"; + std::cout << " instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getMatrixProduct(const Matrix1 &, const Matrix2 &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Cuda, Index=int, Matrix1=Tridiagonal_cuda_int, Matrix2=Tridiagonal_cuda_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(901): here\n"; + std::cout << " instantiation of \"void test_GetMatrixProduct<Matrix>() [with Matrix=Tridiagonal_cuda_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1332): here\n\n"; +} +#endif + +TEST( TridiagonalMatrixTest, Tridiagonal_getTranspositionTest_Host ) +{ +// test_GetTransposition< Tridiagonal_host_int >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on CPU, this test will not build, but will print the following message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(836): error: no instance of function template \"TNL::Matrices::TridiagonalTranspositionAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Tridiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1420): here\n\n"; + std::cout << "AND this message: \n"; + std::cout << " /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h(852): error: no instance of function template \"TNL::Matrices::TridiagonalTranspositionNonAlignedKernel\" matches the argument list\n"; + std::cout << " argument types are: (TNL::Matrices::Tridiagonal<int, TNL::Devices::Host, int> *, Tridiagonal_host_int *, const int, int, int)\n"; + std::cout << " detected during:\n"; + std::cout << " instantiation of \"void TNL::Matrices::Tridiagonal<Real, Device, Index>::getTransposition(const Matrix &, const TNL::Matrices::Tridiagonal<Real, Device, Index>::RealType &) [with Real=int, Device=TNL::Devices::Host, Index=int, Matrix=Tridiagonal_host_int, tileDim=32]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(977): here\n"; + std::cout << " instantiation of \"void test_GetTransposition<Matrix>() [with Matrix=Tridiagonal_host_int]\"\n"; + std::cout << " /home/lukas/tnl-dev/src/UnitTests/Matrices/TridiagonalMatrixTest.h(1420): here\n\n"; +} + +#ifdef HAVE_CUDA +TEST( TridiagonalMatrixTest, Tridiagonal_getTranspositionTest_Cuda ) +{ +// test_GetTransposition< Tridiagonal_cuda_int >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched on GPU, this test throws the following message: \n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!!\n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " Assertion 'row >= 0 && row < this->getRows() && column >= 0 && column < this->getColumns()' failed !!! \n"; + std::cout << " File: /home/lukas/tnl-dev/src/TNL/Matrices/Tridiagonal_impl.h \n"; + std::cout << " Line: 329 \n"; + std::cout << " Diagnostics: Not supported with CUDA.\n"; + std::cout << " terminate called after throwing an instance of 'TNL::Exceptions::CudaRuntimeError'\n"; + std::cout << " what(): CUDA ERROR 4 (cudaErrorLaunchFailure): unspecified launch failure.\n"; + std::cout << " Source: line 57 in /home/lukas/tnl-dev/src/TNL/Containers/Algorithms/ArrayOperationsCuda_impl.h: unspecified launch failure\n"; + std::cout << " [1] 4003 abort (core dumped) ./TridiagonalMatrixTest-dbg\n"; +} +#endif + +TEST( TridiagonalMatrixTest, Tridiagonal_performSORIterationTest_Host ) +{ + test_PerformSORIteration< Tridiagonal_host_float >(); +} + +#ifdef HAVE_CUDA +TEST( TridiagonalMatrixTest, Tridiagonal_performSORIterationTest_Cuda ) +{ +// test_PerformSORIteration< Tridiagonal_cuda_float >(); + bool testRan = false; + EXPECT_TRUE( testRan ); + std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; + std::cout << "If launched, this test throws the following message: \n"; + std::cout << " [1] 6992 segmentation fault (core dumped) ./SparseMatrixTest-dbg\n\n"; + std::cout << "\n THIS IS NOT IMPLEMENTED FOR CUDA YET!!\n\n"; +} +#endif + * */ + +#endif // HAVE_GTEST + +#include "../main.h"