Loading src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +4 −2 Original line number Diff line number Diff line Loading @@ -361,9 +361,11 @@ struct LinearSolversBenchmark getTrivialOrdering( *matrixPointer, perm, iperm ); SharedPointer< MatrixType > matrix_perm; VectorType x0_perm, b_perm; x0_perm.setLike( x0 ); b_perm.setLike( b ); Matrices::reorderSparseMatrix( *matrixPointer, *matrix_perm, perm, iperm ); Matrices::reorderVector( x0, x0_perm, perm ); Matrices::reorderVector( b, b_perm, perm ); Matrices::reorderArray( x0, x0_perm, perm ); Matrices::reorderArray( b, b_perm, perm ); if( CommunicatorType::isDistributed() ) runDistributed( benchmark, metadata, parameters, matrix_perm, x0_perm, b_perm ); else Loading src/TNL/Matrices/SparseOperations.h +4 −4 Original line number Diff line number Diff line Loading @@ -35,15 +35,15 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B, // permutation to the columns of the matrix, i.e. A_perm = P*A*P^{-1}, where // P is the permutation matrix represented by the perm vector and P^{-1} is the // inverse permutation represented by the iperm vector. template< typename Matrix1, typename Matrix2, typename PermutationVector > template< typename Matrix1, typename Matrix2, typename PermutationArray > void reorderSparseMatrix( const Matrix1& A, Matrix2& A_perm, const PermutationVector& perm, const PermutationVector& iperm ); const PermutationArray& perm, const PermutationArray& iperm ); // TODO: the method does not belong here, but there is no better place... template< typename Vector, typename PermutationVector > template< typename Array1, typename Array2, typename PermutationArray > void reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm ); reorderArray( const Array1& src, Array2& dest, const PermutationArray& perm ); } // namespace Matrices } // namespace TNL Loading src/TNL/Matrices/SparseOperations_impl.h +18 −14 Original line number Diff line number Diff line Loading @@ -263,14 +263,14 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B, } template< typename Matrix1, typename Matrix2, typename PermutationVector > template< typename Matrix1, typename Matrix2, typename PermutationArray > void reorderSparseMatrix( const Matrix1& matrix1, Matrix2& matrix2, const PermutationVector& perm, const PermutationVector& iperm ) reorderSparseMatrix( const Matrix1& matrix1, Matrix2& matrix2, const PermutationArray& perm, const PermutationArray& iperm ) { // TODO: implement on GPU static_assert( std::is_same< typename Matrix1::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); static_assert( std::is_same< typename Matrix2::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); static_assert( std::is_same< typename PermutationArray::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); using IndexType = typename Matrix1::IndexType; Loading Loading @@ -328,27 +328,31 @@ reorderSparseMatrix( const Matrix1& matrix1, Matrix2& matrix2, const Permutation } } template< typename Vector, typename PermutationVector > template< typename Array1, typename Array2, typename PermutationArray > void reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm ) reorderArray( const Array1& src, Array2& dest, const PermutationArray& perm ) { static_assert( std::is_same< typename Array1::DeviceType, typename Array2::DeviceType >::value, "Arrays must reside on the same device." ); static_assert( std::is_same< typename Array1::DeviceType, typename PermutationArray::DeviceType >::value, "Arrays must reside on the same device." ); TNL_ASSERT_EQ( src.getSize(), perm.getSize(), "Source vector and permutation must have the same size." ); using RealType = typename Vector::RealType; using DeviceType = typename Vector::DeviceType; using IndexType = typename Vector::IndexType; "Source array and permutation must have the same size." ); TNL_ASSERT_EQ( dest.getSize(), perm.getSize(), "Destination array and permutation must have the same size." ); using DeviceType = typename Array1::DeviceType; using IndexType = typename Array1::IndexType; auto kernel = [] __cuda_callable__ ( IndexType i, const RealType* src, RealType* dest, const typename PermutationVector::RealType* perm ) const typename Array1::ValueType* src, typename Array2::ValueType* dest, const typename PermutationArray::ValueType* perm ) { dest[ i ] = src[ perm[ i ] ]; }; dest.setLike( src ); ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(), kernel, src.getData(), Loading Loading
src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +4 −2 Original line number Diff line number Diff line Loading @@ -361,9 +361,11 @@ struct LinearSolversBenchmark getTrivialOrdering( *matrixPointer, perm, iperm ); SharedPointer< MatrixType > matrix_perm; VectorType x0_perm, b_perm; x0_perm.setLike( x0 ); b_perm.setLike( b ); Matrices::reorderSparseMatrix( *matrixPointer, *matrix_perm, perm, iperm ); Matrices::reorderVector( x0, x0_perm, perm ); Matrices::reorderVector( b, b_perm, perm ); Matrices::reorderArray( x0, x0_perm, perm ); Matrices::reorderArray( b, b_perm, perm ); if( CommunicatorType::isDistributed() ) runDistributed( benchmark, metadata, parameters, matrix_perm, x0_perm, b_perm ); else Loading
src/TNL/Matrices/SparseOperations.h +4 −4 Original line number Diff line number Diff line Loading @@ -35,15 +35,15 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B, // permutation to the columns of the matrix, i.e. A_perm = P*A*P^{-1}, where // P is the permutation matrix represented by the perm vector and P^{-1} is the // inverse permutation represented by the iperm vector. template< typename Matrix1, typename Matrix2, typename PermutationVector > template< typename Matrix1, typename Matrix2, typename PermutationArray > void reorderSparseMatrix( const Matrix1& A, Matrix2& A_perm, const PermutationVector& perm, const PermutationVector& iperm ); const PermutationArray& perm, const PermutationArray& iperm ); // TODO: the method does not belong here, but there is no better place... template< typename Vector, typename PermutationVector > template< typename Array1, typename Array2, typename PermutationArray > void reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm ); reorderArray( const Array1& src, Array2& dest, const PermutationArray& perm ); } // namespace Matrices } // namespace TNL Loading
src/TNL/Matrices/SparseOperations_impl.h +18 −14 Original line number Diff line number Diff line Loading @@ -263,14 +263,14 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B, } template< typename Matrix1, typename Matrix2, typename PermutationVector > template< typename Matrix1, typename Matrix2, typename PermutationArray > void reorderSparseMatrix( const Matrix1& matrix1, Matrix2& matrix2, const PermutationVector& perm, const PermutationVector& iperm ) reorderSparseMatrix( const Matrix1& matrix1, Matrix2& matrix2, const PermutationArray& perm, const PermutationArray& iperm ) { // TODO: implement on GPU static_assert( std::is_same< typename Matrix1::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); static_assert( std::is_same< typename Matrix2::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); static_assert( std::is_same< typename PermutationArray::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); using IndexType = typename Matrix1::IndexType; Loading Loading @@ -328,27 +328,31 @@ reorderSparseMatrix( const Matrix1& matrix1, Matrix2& matrix2, const Permutation } } template< typename Vector, typename PermutationVector > template< typename Array1, typename Array2, typename PermutationArray > void reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm ) reorderArray( const Array1& src, Array2& dest, const PermutationArray& perm ) { static_assert( std::is_same< typename Array1::DeviceType, typename Array2::DeviceType >::value, "Arrays must reside on the same device." ); static_assert( std::is_same< typename Array1::DeviceType, typename PermutationArray::DeviceType >::value, "Arrays must reside on the same device." ); TNL_ASSERT_EQ( src.getSize(), perm.getSize(), "Source vector and permutation must have the same size." ); using RealType = typename Vector::RealType; using DeviceType = typename Vector::DeviceType; using IndexType = typename Vector::IndexType; "Source array and permutation must have the same size." ); TNL_ASSERT_EQ( dest.getSize(), perm.getSize(), "Destination array and permutation must have the same size." ); using DeviceType = typename Array1::DeviceType; using IndexType = typename Array1::IndexType; auto kernel = [] __cuda_callable__ ( IndexType i, const RealType* src, RealType* dest, const typename PermutationVector::RealType* perm ) const typename Array1::ValueType* src, typename Array2::ValueType* dest, const typename PermutationArray::ValueType* perm ) { dest[ i ] = src[ perm[ i ] ]; }; dest.setLike( src ); ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(), kernel, src.getData(), Loading