diff --git a/src/TNL/Algorithms/AtomicOperations.h b/src/TNL/Algorithms/AtomicOperations.h index 456839b5ea2ba69fcb333c814a6d6ff3af4f5d11..6da10e2acab1c1429f854053ce06361bda540726 100644 --- a/src/TNL/Algorithms/AtomicOperations.h +++ b/src/TNL/Algorithms/AtomicOperations.h @@ -8,19 +8,13 @@ #pragma once -#ifdef HAVE_CUDA - #include <cuda.h> -#endif -#include <TNL/Devices/Sequential.h> -#include <TNL/Devices/Host.h> -#include <TNL/Devices/Cuda.h> +#include <TNL/Atomic.h> namespace TNL { namespace Algorithms { template< typename Device > -struct AtomicOperations -{}; +struct AtomicOperations; template<> struct AtomicOperations< Devices::Host > @@ -31,11 +25,16 @@ struct AtomicOperations< Devices::Host > TNL_NVCC_HD_WARNING_DISABLE template< typename Value > __cuda_callable__ - static void + static Value add( Value& v, const Value& a ) { - #pragma omp atomic update - v += a; + Value old; + #pragma omp atomic capture + { + old = v; + v += a; + } + return old; } }; @@ -48,10 +47,12 @@ struct AtomicOperations< Devices::Sequential > TNL_NVCC_HD_WARNING_DISABLE template< typename Value > __cuda_callable__ - static void + static Value add( Value& v, const Value& a ) { + const Value old = v; v += a; + return old; } }; @@ -60,56 +61,26 @@ struct AtomicOperations< Devices::Cuda > { template< typename Value > __cuda_callable__ - static void + static Value add( Value& v, const Value& a ) { #ifdef HAVE_CUDA - atomicAdd( &v, a ); -#endif // HAVE_CUDA - } - -#ifdef HAVE_CUDA - __device__ - static void - add( double& v, const double& a ) - { - #if __CUDA_ARCH__ < 600 - unsigned long long int* v_as_ull = (unsigned long long int*) &v; - unsigned long long int old = *v_as_ull, assumed; - - do { - assumed = old; - old = atomicCAS( v_as_ull, assumed, __double_as_longlong( a + __longlong_as_double( assumed ) ) ); - - // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) - } while( assumed != old ); - #else // __CUDA_ARCH__ < 600 - atomicAdd( &v, a ); - #endif //__CUDA_ARCH__ < 600 - } -#else // HAVE_CUDA - static void - add( double& v, const double& a ) - {} -#endif // HAVE_CUDA - - __cuda_callable__ - static void - add( long int& v, const long int& a ) - { -#ifdef HAVE_CUDA - TNL_ASSERT_TRUE( false, "Atomic add for long int is not supported on CUDA." ); -#endif // HAVE_CUDA + return atomicAdd( &v, a ); +#else + return 0; +#endif } __cuda_callable__ - static void + static short int add( short int& v, const short int& a ) { #ifdef HAVE_CUDA TNL_ASSERT_TRUE( false, "Atomic add for short int is not supported on CUDA." ); -#endif // HAVE_CUDA +#endif + return 0; } }; + } // namespace Algorithms } // namespace TNL diff --git a/src/TNL/Atomic.h b/src/TNL/Atomic.h index 94ac5dfc8cf4993f35a03f81a5d3e1ccbcd3c344..2f457d74c81f30fcccf8bef5fbe4e4c4a50869ea 100644 --- a/src/TNL/Atomic.h +++ b/src/TNL/Atomic.h @@ -14,11 +14,12 @@ #include <TNL/Devices/Sequential.h> #include <TNL/Devices/Cuda.h> -// double-precision atomicAdd function for Maxwell and older GPUs -// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions #ifdef HAVE_CUDA - #if __CUDA_ARCH__ < 600 namespace { + +// double-precision atomicAdd function for Maxwell and older GPUs +// copied from: https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions + #if defined( __CUDA_ARCH__ ) && __CUDA_ARCH__ < 600 __device__ double atomicAdd( double* address, double val ) @@ -35,8 +36,28 @@ atomicAdd( double* address, double val ) return __longlong_as_double( old ); } -} // namespace #endif + +__device__ +long int +atomicAdd( long int* address, long int val ) +{ + unsigned long long int* address_as_unsigned = reinterpret_cast< unsigned long long int* >( address ); + long int old = *address; + long int assumed; + + do { + assumed = old; + long int sum = val + assumed; + old = atomicCAS( address_as_unsigned, + *reinterpret_cast< unsigned long long int* >( &assumed ), + *reinterpret_cast< unsigned long long int* >( &sum ) ); + } while( assumed != old ); + + return old; +} + +} // namespace #endif namespace TNL { diff --git a/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.h b/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.h index 27a9dfaa83fb06c2539269e8f21b1f61fa5585ca..2e4d55643cc7995cc1fcb7a9e2f9445d286b9deb 100644 --- a/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.h +++ b/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.h @@ -1054,11 +1054,12 @@ public: void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, const RealType& matrixMultiplicator = 1.0, const RealType& thisMatrixMultiplicator = 1.0 ); + */ template< typename Real2, typename Index2 > - void getTransposition( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); - */ + void + getTransposition( const SparseSandboxMatrix< Real2, Device, Index2, MatrixType >& matrix, + const RealType& matrixMultiplicator = 1.0 ); template< typename Vector1, typename Vector2 > bool diff --git a/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.hpp b/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.hpp index abb963b984984877a25d6b29ed4f53203fe5f794..f84f79f920953136d6b0cb96f2841e1176195536 100644 --- a/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.hpp +++ b/src/TNL/Matrices/Sandbox/SparseSandboxMatrix.hpp @@ -112,7 +112,7 @@ SparseSandboxMatrix< Real, Device, Index, MatrixType, RealAllocator, IndexAlloca this->getColumns(), this->getValues().getConstView(), this->columnIndexes.getConstView(), - this->segments.getConstView() ); + const_cast< SparseSandboxMatrix* >( this )->rowPointers.getView() ); } template< typename Real, typename Device, typename Index, typename MatrixType, typename RealAllocator, typename IndexAllocator > @@ -479,7 +479,7 @@ template< typename Function > void SparseSandboxMatrix< Real, Device, Index, MatrixType, RealAllocator, IndexAllocator >::forAllRows( Function&& function ) const { - this->getConsView().forAllRows( function ); + this->getConstView().forAllRows( function ); } template< typename Real, typename Device, typename Index, typename MatrixType, typename RealAllocator, typename IndexAllocator > @@ -521,7 +521,8 @@ SparseSandboxMatrix< Real, Device, Index, MatrixType, RealAllocator, IndexAlloca this->sequentialForRows( 0, this->getRows(), function ); } -/*template< typename Real, +/* +template< typename Real, template< typename, typename, typename > class Segments, typename Device, typename Index, @@ -532,23 +533,67 @@ IndexAllocator2 > void SparseSandboxMatrix< Real, Device, Index, MatrixType, Rea SparseSandboxMatrix< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { - } +*/ -template< typename Real, - template< typename, typename, typename > class Segments, - typename Device, - typename Index, - typename RealAllocator, - typename IndexAllocator > +template< typename Real, typename Device, typename Index, typename MatrixType, typename RealAllocator, typename IndexAllocator > template< typename Real2, typename Index2 > void -SparseSandboxMatrix< Real, Device, Index, MatrixType, RealAllocator, IndexAllocator >:: -getTransposition( const SparseSandboxMatrix< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator ) +SparseSandboxMatrix< Real, Device, Index, MatrixType, RealAllocator, IndexAllocator >::getTransposition( + const SparseSandboxMatrix< Real2, Device, Index2, MatrixType >& matrix, + const RealType& matrixMultiplicator ) { + // set transposed dimensions + setDimensions( matrix.getColumns(), matrix.getRows() ); -}*/ + // stage 1: compute row capacities for the transposition + RowsCapacitiesType capacities; + capacities.resize( this->getRows(), 0 ); + auto capacities_view = capacities.getView(); + using MatrixRowView = typename SparseSandboxMatrix< Real2, Device, Index2, MatrixType >::ConstRowView; + matrix.forAllRows( + [ = ] __cuda_callable__( const MatrixRowView& row ) mutable + { + for( IndexType c = 0; c < row.getSize(); c++ ) { + // row index of the transpose = column index of the input + const IndexType& transRowIdx = row.getColumnIndex( c ); + if( transRowIdx < 0 ) + continue; + // increment the capacity for the row in the transpose + Algorithms::AtomicOperations< DeviceType >::add( capacities_view[ row.getColumnIndex( c ) ], IndexType( 1 ) ); + } + } ); + + // set the row capacities + setRowCapacities( capacities ); + capacities.reset(); + + // index of the first unwritten element per row + RowsCapacitiesType offsets; + offsets.resize( this->getRows(), 0 ); + auto offsets_view = offsets.getView(); + + // stage 2: copy and transpose the data + auto trans_view = getView(); + matrix.forAllRows( + [ = ] __cuda_callable__( const MatrixRowView& row ) mutable + { + // row index of the input = column index of the transpose + const IndexType& rowIdx = row.getRowIndex(); + for( IndexType c = 0; c < row.getSize(); c++ ) { + // row index of the transpose = column index of the input + const IndexType& transRowIdx = row.getColumnIndex( c ); + if( transRowIdx < 0 ) + continue; + // local index in the row of the transpose + const IndexType transLocalIdx = + Algorithms::AtomicOperations< DeviceType >::add( offsets_view[ transRowIdx ], IndexType( 1 ) ); + // get the row in the transposed matrix and set the value + auto transRow = trans_view.getRow( transRowIdx ); + transRow.setElement( transLocalIdx, rowIdx, row.getValue( c ) * matrixMultiplicator ); + } + } ); +} template< typename Real, typename Device, typename Index, typename MatrixType, typename RealAllocator, typename IndexAllocator > template< typename Vector1, typename Vector2 > diff --git a/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.h b/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.h index 58e4b77dc87c8bf078bb3180afe1ea494b17bff8..45840f487bb14b50bc8be715e1012722ad98be52 100644 --- a/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.h +++ b/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.h @@ -83,7 +83,7 @@ public: /** * \brief The type of matrix elements. */ - using RealType = Real; + using RealType = std::remove_const_t< Real >; // using ComputeRealType = ComputeReal; diff --git a/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.hpp b/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.hpp index a80f766d4ea9078828cc36cadbf2cfc4a49fce6e..506793a264ac004acc7f1611132345f8d9c9a889 100644 --- a/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.hpp +++ b/src/TNL/Matrices/Sandbox/SparseSandboxMatrixView.hpp @@ -793,7 +793,7 @@ template< typename Real, typename Device, typename Index, typename MatrixType > void SparseSandboxMatrixView< Real, Device, Index, MatrixType >::save( File& file ) const { - MatrixView< RealType, DeviceType, IndexType >::save( file ); + MatrixView< Real, Device, Index >::save( file ); file << this->columnIndexes << this->rowPointers; // SANDBOX_TODO: Replace this with medata required by your format } diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index 76aee6dfb4498c782f9d99c6281e9c264749b5b8..b47ef9c853d991774ac384e2f5cf2f2fb25466e8 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -1088,12 +1088,13 @@ public: void addMatrix( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, const RealType& matrixMultiplicator = 1.0, const RealType& thisMatrixMultiplicator = 1.0 ); - - template< typename Real2, typename Index2 > - void getTransposition( const SparseMatrix< Real2, Segments, Device, Index2 >& matrix, - const RealType& matrixMultiplicator = 1.0 ); */ + template< typename Real2, typename Index2, template< typename, typename, typename > class Segments2 > + void + getTransposition( const SparseMatrix< Real2, Device, Index2, MatrixType, Segments2 >& matrix, + const ComputeRealType& matrixMultiplicator = 1.0 ); + /** * \brief Copy-assignment of exactly the same matrix type. * diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index f53e8c3cbe11e850f43365afcd1d5d5c11868757..a2d497f1e51cb96c9a9bfb4bb49bbbe631ecb8f3 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -795,7 +795,7 @@ void SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::forAllRows( Function&& function ) const { - this->getConsView().forAllRows( function ); + this->getConstView().forAllRows( function ); } template< typename Real, @@ -870,7 +870,8 @@ SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAlloca this->sequentialForRows( (IndexType) 0, this->getRows(), function ); } -/*template< typename Real, +/* +template< typename Real, template< typename, typename, typename > class Segments, typename Device, typename Index, @@ -882,23 +883,75 @@ addMatrix( const SparseMatrix< Real2, Segments2, Device, Index2, RealAllocator2, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { - } +*/ template< typename Real, - template< typename, typename, typename > class Segments, typename Device, typename Index, + typename MatrixType, + template< typename, typename, typename > + class Segments, + typename ComputeReal, typename RealAllocator, typename IndexAllocator > -template< typename Real2, typename Index2 > +template< typename Real2, typename Index2, template< typename, typename, typename > class Segments2 > void -SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >:: -getTransposition( const SparseMatrix< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator ) +SparseMatrix< Real, Device, Index, MatrixType, Segments, ComputeReal, RealAllocator, IndexAllocator >::getTransposition( + const SparseMatrix< Real2, Device, Index2, MatrixType, Segments2 >& matrix, + const ComputeRealType& matrixMultiplicator ) { + // set transposed dimensions + setDimensions( matrix.getColumns(), matrix.getRows() ); + + // stage 1: compute row capacities for the transposition + RowsCapacitiesType capacities; + capacities.resize( this->getRows(), 0 ); + auto capacities_view = capacities.getView(); + using MatrixRowView = typename SparseMatrix< Real2, Device, Index2, MatrixType, Segments2 >::ConstRowView; + matrix.forAllRows( + [ = ] __cuda_callable__( const MatrixRowView& row ) mutable + { + for( IndexType c = 0; c < row.getSize(); c++ ) { + // row index of the transpose = column index of the input + const IndexType& transRowIdx = row.getColumnIndex( c ); + if( transRowIdx == row.getPaddingIndex() ) + continue; + // increment the capacity for the row in the transpose + Algorithms::AtomicOperations< DeviceType >::add( capacities_view[ transRowIdx ], IndexType( 1 ) ); + } + } ); + + // set the row capacities + setRowCapacities( capacities ); + capacities.reset(); + + // index of the first unwritten element per row + RowsCapacitiesType offsets; + offsets.resize( this->getRows(), 0 ); + auto offsets_view = offsets.getView(); -}*/ + // stage 2: copy and transpose the data + auto trans_view = getView(); + matrix.forAllRows( + [ = ] __cuda_callable__( const MatrixRowView& row ) mutable + { + // row index of the input = column index of the transpose + const IndexType& rowIdx = row.getRowIndex(); + for( IndexType c = 0; c < row.getSize(); c++ ) { + // row index of the transpose = column index of the input + const IndexType& transRowIdx = row.getColumnIndex( c ); + if( transRowIdx == row.getPaddingIndex() ) + continue; + // local index in the row of the transpose + const IndexType transLocalIdx = + Algorithms::AtomicOperations< DeviceType >::add( offsets_view[ transRowIdx ], IndexType( 1 ) ); + // get the row in the transposed matrix and set the value + auto transRow = trans_view.getRow( transRowIdx ); + transRow.setElement( transLocalIdx, rowIdx, row.getValue( c ) * matrixMultiplicator ); + } + } ); +} // copy assignment template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrixView.hpp b/src/TNL/Matrices/SparseMatrixView.hpp index 9420363b82be57eee4995bffa783ce02fac09114..9ee552fbd6e88be04f9247148690707a78f22f1c 100644 --- a/src/TNL/Matrices/SparseMatrixView.hpp +++ b/src/TNL/Matrices/SparseMatrixView.hpp @@ -820,35 +820,6 @@ SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >:: this->sequentialForRows( (IndexType) 0, this->getRows(), function ); } -/*template< typename Real, - template< typename, typename > class SegmentsView, - typename Device, - typename Index, - typename RealAllocator, - typename IndexAllocator > -template< typename Real2, template< typename, typename > class Segments2, typename Index2, typename RealAllocator2, typename -IndexAllocator2 > void SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >:: addMatrix( const -SparseMatrixView< Real2, Segments2, Device, Index2, RealAllocator2, IndexAllocator2 >& matrix, const RealType& -matrixMultiplicator, const RealType& thisMatrixMultiplicator ) -{ - -} - -template< typename Real, - template< typename, typename > class SegmentsView, - typename Device, - typename Index, - typename RealAllocator, - typename IndexAllocator > -template< typename Real2, typename Index2 > -void -SparseMatrixView< Real, Device, Index, MatrixType, SegmentsView, ComputeReal >:: -getTransposition( const SparseMatrixView< Real2, Device, Index2 >& matrix, - const RealType& matrixMultiplicator ) -{ - -}*/ - template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Solvers/Linear/Hypre.h b/src/TNL/Solvers/Linear/Hypre.h index a5d24aeecf89eae75eb08cb002ecb42d99f2924a..a300595822ccadfad29741a4be7a794c89441ca0 100644 --- a/src/TNL/Solvers/Linear/Hypre.h +++ b/src/TNL/Solvers/Linear/Hypre.h @@ -80,7 +80,8 @@ public: * will not be updated for the new matrix when calling * the \ref solve method. */ - virtual void setMatrix( const Matrices::HypreParCSRMatrix& op, bool reuse_setup = false ) + virtual void + setMatrix( const Matrices::HypreParCSRMatrix& op, bool reuse_setup = false ) { A = &op; if( setup_called && reuse_setup ) diff --git a/src/TNL/Solvers/Linear/Hypre.hpp b/src/TNL/Solvers/Linear/Hypre.hpp index 16226a469e6cb1ffb797571b94f470c1ff887965..37089d4d798e8187622447812189ad2b2e668fe6 100644 --- a/src/TNL/Solvers/Linear/Hypre.hpp +++ b/src/TNL/Solvers/Linear/Hypre.hpp @@ -248,7 +248,7 @@ HypreFlexGMRES::setMatrix( const Matrices::HypreParCSRMatrix& op, bool reuse_set { HypreSolver::setMatrix( op, reuse_setup ); if( precond != nullptr ) - precond->setMatrix( *A, reuse_setup ); + precond->setMatrix( *A, reuse_setup ); } void @@ -286,7 +286,7 @@ HypreParaSails::HypreParaSails( MPI_Comm comm ) HypreParaSails::HypreParaSails( const Matrices::HypreParCSRMatrix& A ) : HypreSolver( A ) { - HYPRE_ParaSailsCreate( A.getCommunicator() , &solver ); + HYPRE_ParaSailsCreate( A.getCommunicator(), &solver ); } HypreParaSails::~HypreParaSails() diff --git a/src/UnitTests/Algorithms/distributedScanTest.h b/src/UnitTests/Algorithms/distributedScanTest.h index 1ba5277d4bbbeef1b0e741e981a0a3dd270f18db..765e2427c44370c8f69380dc748ac6b0468d7e90 100644 --- a/src/UnitTests/Algorithms/distributedScanTest.h +++ b/src/UnitTests/Algorithms/distributedScanTest.h @@ -17,6 +17,12 @@ using namespace TNL::Algorithms; using namespace TNL::Algorithms::detail; using namespace TNL::MPI; +// this is a workaround for an nvcc 11.7 bug: it drops the scope of enum class members in template function calls +#ifdef HAVE_CUDA +static constexpr auto Inclusive = TNL::Algorithms::detail::ScanType::Inclusive; +static constexpr auto Exclusive = TNL::Algorithms::detail::ScanType::Exclusive; +#endif + /* * Light check of DistributedArray. * diff --git a/src/UnitTests/Algorithms/scanTest.h b/src/UnitTests/Algorithms/scanTest.h index 9611d7acd66c9b02c86bda2d4f91747a2f9f5687..fe794ef53ff75ac82fbd1d032bbbd278decfa072 100644 --- a/src/UnitTests/Algorithms/scanTest.h +++ b/src/UnitTests/Algorithms/scanTest.h @@ -15,6 +15,12 @@ using namespace TNL::Arithmetics; using namespace TNL::Algorithms; using namespace TNL::Algorithms::detail; +// this is a workaround for an nvcc 11.7 bug: it drops the scope of enum class members in template function calls +#ifdef HAVE_CUDA +static constexpr auto Inclusive = TNL::Algorithms::detail::ScanType::Inclusive; +static constexpr auto Exclusive = TNL::Algorithms::detail::ScanType::Exclusive; +#endif + // test fixture for typed tests template< typename Array > class ScanTest : public ::testing::Test diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 2281d229709f53214d14c420dd92ff67d093801c..c4086e634807b842b8aa67d23e7fac303854a958 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -99,10 +99,17 @@ TYPED_TEST( MatrixTest, reduceRows ) test_reduceRows< MatrixType >(); } -TYPED_TEST( MatrixTest, saveAndLoadTest ) +TYPED_TEST( MatrixTest, saveAndLoad ) { using MatrixType = typename TestFixture::MatrixType; test_SaveAndLoad< MatrixType >( saveAndLoadFileName ); } + +TYPED_TEST( MatrixTest, getTransposition ) +{ + using MatrixType = typename TestFixture::MatrixType; + + test_getTransposition< MatrixType >(); +} #endif diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 0e7884859d63a07c6fd97cd41354fbda590dde79..6a74e06be6eb7eb5dae680ed8db2a99f7cc6840b 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -1356,4 +1356,68 @@ void test_SaveAndLoad( const char* filename ) EXPECT_EQ( std::remove( filename ), 0 ); } +template< typename Matrix > +void test_getTransposition() +{ + using RealType = typename Matrix::RealType; + using IndexType = typename Matrix::IndexType; + + /* + * Sets up the following 5x4 sparse matrix: + * + * / 1 2 3 0 \ + * | 0 4 0 5 | + * | 6 7 8 0 | + * | 0 9 10 11 | + * \ 12 0 0 0 / + */ + + const IndexType m_rows = 5; + const IndexType m_cols = 4; + + Matrix matrix( m_rows, m_cols ); + typename Matrix::RowsCapacitiesType capacities( m_rows, 4 ); + matrix.setRowCapacities( capacities ); + + RealType value = 1; + for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row + matrix.setElement( 0, i, value++ ); + + matrix.setElement( 1, 1, value++ ); // 1st row + matrix.setElement( 1, 3, value++ ); + + for( IndexType i = 0; i < m_cols - 1; i++ ) // 2nd row + matrix.setElement( 2, i, value++ ); + + for( IndexType i = 1; i < m_cols; i++ ) // 3rd row + matrix.setElement( 3, i, value++ ); + + matrix.setElement( 4, 0, value++ ); // 4th row + + Matrix transposed; + transposed.getTransposition( matrix ); + + EXPECT_EQ( transposed.getElement( 0, 0 ), 1 ); + EXPECT_EQ( transposed.getElement( 1, 0 ), 2 ); + EXPECT_EQ( transposed.getElement( 2, 0 ), 3 ); + EXPECT_EQ( transposed.getElement( 3, 0 ), 0 ); + + EXPECT_EQ( transposed.getElement( 0, 1 ), 0 ); + EXPECT_EQ( transposed.getElement( 1, 1 ), 4 ); + EXPECT_EQ( transposed.getElement( 2, 1 ), 0 ); + EXPECT_EQ( transposed.getElement( 3, 1 ), 5 ); + + EXPECT_EQ( transposed.getElement( 0, 2 ), 6 ); + EXPECT_EQ( transposed.getElement( 1, 2 ), 7 ); + EXPECT_EQ( transposed.getElement( 2, 2 ), 8 ); + EXPECT_EQ( transposed.getElement( 3, 2 ), 0 ); + + EXPECT_EQ( transposed.getElement( 0, 3 ), 0 ); + EXPECT_EQ( transposed.getElement( 1, 3 ), 9 ); + EXPECT_EQ( transposed.getElement( 2, 3 ), 10 ); + EXPECT_EQ( transposed.getElement( 3, 3 ), 11 ); + + EXPECT_EQ( transposed.getElement( 0, 4 ), 12 ); +} + #endif