Loading src/Benchmarks/CMakeLists.txt +1 −0 Original line number Original line Diff line number Diff line add_subdirectory( HeatEquation ) add_subdirectory( HeatEquation ) add_subdirectory( BLAS ) add_subdirectory( BLAS ) add_subdirectory( SpMV ) add_subdirectory( SpMV ) add_subdirectory( DistSpMV ) add_subdirectory( LinearSolvers ) add_subdirectory( LinearSolvers ) set( headers set( headers Loading src/Benchmarks/DistSpMV/CMakeLists.txt 0 → 100644 +11 −0 Original line number Original line Diff line number Diff line if( BUILD_CUDA ) cuda_add_executable( tnl-benchmark-distributed-spmv-cuda tnl-benchmark-distributed-spmv.cu ) target_link_libraries( tnl-benchmark-distributed-spmv-cuda tnl ) install( TARGETS tnl-benchmark-distributed-spmv-cuda RUNTIME DESTINATION bin ) endif() add_executable( tnl-benchmark-distributed-spmv tnl-benchmark-distributed-spmv.cpp ) target_link_libraries( tnl-benchmark-distributed-spmv tnl ) install( TARGETS tnl-benchmark-distributed-spmv RUNTIME DESTINATION bin ) src/Benchmarks/DistSpMV/ordering.h 0 → 100644 +133 −0 Original line number Original line Diff line number Diff line #pragma once #include <algorithm> #include <TNL/Devices/Host.h> #include <TNL/ParallelFor.h> using namespace TNL; template< typename Matrix, typename PermutationVector > void getTrivialOrdering( const Matrix& matrix, PermutationVector& perm, PermutationVector& iperm ) { using IndexType = typename Matrix::IndexType; // allocate permutation vectors perm.setSize( matrix.getRows() ); iperm.setSize( matrix.getRows() ); const IndexType N = matrix.getRows() / 2; for( IndexType i = 0; i < N; i++ ) { perm[ 2 * i ] = i; perm[ 2 * i + 1 ] = i + N; iperm[ i ] = 2 * i; iperm[ i + N ] = 2 * i + 1; } } template< typename Vector, typename PermutationVector > void reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm ) { TNL_ASSERT_EQ( src.getSize(), perm.getSize(), "Source vector and permutation must have the same size." ); using RealType = typename Vector::RealType; using DeviceType = typename Vector::DeviceType; using IndexType = typename Vector::IndexType; auto kernel = [] __cuda_callable__ ( IndexType i, const RealType* src, RealType* dest, const typename PermutationVector::RealType* perm ) { dest[ i ] = src[ perm[ i ] ]; }; dest.setLike( src ); ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(), kernel, src.getData(), dest.getData(), perm.getData() ); } template< typename Matrix, typename PermutationVector > void reorderMatrix( const Matrix& matrix1, Matrix& matrix2, const PermutationVector& _perm, const PermutationVector& _iperm ) { // TODO: implement on GPU static_assert( std::is_same< typename Matrix::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); using namespace TNL; using IndexType = typename Matrix::IndexType; matrix2.setLike( matrix1 ); // general multidimensional accessors for permutation indices // TODO: this depends on the specific layout of dofs, general reordering of NDArray is needed auto perm = [&]( IndexType dof ) { TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" ); const IndexType i = dof / _perm.getSize(); return i * _perm.getSize() + _perm[ dof % _perm.getSize() ]; }; auto iperm = [&]( IndexType dof ) { TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" ); const IndexType i = dof / _iperm.getSize(); return i * _iperm.getSize() + _iperm[ dof % _iperm.getSize() ]; }; // set row lengths typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( matrix1.getRows() ); for( IndexType i = 0; i < matrix1.getRows(); i++ ) { const IndexType maxLength = matrix1.getRowLength( perm( i ) ); const auto row = matrix1.getRow( perm( i ) ); IndexType length = 0; for( IndexType j = 0; j < maxLength; j++ ) if( row.getElementColumn( j ) < matrix1.getColumns() ) length++; rowLengths[ i ] = length; } matrix2.setCompressedRowLengths( rowLengths ); // set row elements for( IndexType i = 0; i < matrix2.getRows(); i++ ) { const IndexType rowLength = rowLengths[ i ]; // extract sparse row const auto row1 = matrix1.getRow( perm( i ) ); // permute typename Matrix::IndexType columns[ rowLength ]; typename Matrix::RealType values[ rowLength ]; for( IndexType j = 0; j < rowLength; j++ ) { columns[ j ] = iperm( row1.getElementColumn( j ) ); values[ j ] = row1.getElementValue( j ); } // sort IndexType indices[ rowLength ]; for( IndexType j = 0; j < rowLength; j++ ) indices[ j ] = j; // nvcc does not allow lambdas to capture VLAs, even in host code (WTF!?) // error: a variable captured by a lambda cannot have a type involving a variable-length array IndexType* _columns = columns; auto comparator = [=]( IndexType a, IndexType b ) { return _columns[ a ] < _columns[ b ]; }; std::sort( indices, indices + rowLength, comparator ); typename Matrix::IndexType sortedColumns[ rowLength ]; typename Matrix::RealType sortedValues[ rowLength ]; for( IndexType j = 0; j < rowLength; j++ ) { sortedColumns[ j ] = columns[ indices[ j ] ]; sortedValues[ j ] = values[ indices[ j ] ]; } matrix2.setRow( i, sortedColumns, sortedValues, rowLength ); } } src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cpp 0 → 100644 +11 −0 Original line number Original line Diff line number Diff line /*************************************************************************** tnl-benchmark-distributed-spmv.cpp - description ------------------- begin : Sep 11, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #include "tnl-benchmark-distributed-spmv.h" src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cu 0 → 100644 +11 −0 Original line number Original line Diff line number Diff line /*************************************************************************** tnl-benchmark-distributed-spmv.cu - description ------------------- begin : Sep 11, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #include "tnl-benchmark-distributed-spmv.h" Loading
src/Benchmarks/CMakeLists.txt +1 −0 Original line number Original line Diff line number Diff line add_subdirectory( HeatEquation ) add_subdirectory( HeatEquation ) add_subdirectory( BLAS ) add_subdirectory( BLAS ) add_subdirectory( SpMV ) add_subdirectory( SpMV ) add_subdirectory( DistSpMV ) add_subdirectory( LinearSolvers ) add_subdirectory( LinearSolvers ) set( headers set( headers Loading
src/Benchmarks/DistSpMV/CMakeLists.txt 0 → 100644 +11 −0 Original line number Original line Diff line number Diff line if( BUILD_CUDA ) cuda_add_executable( tnl-benchmark-distributed-spmv-cuda tnl-benchmark-distributed-spmv.cu ) target_link_libraries( tnl-benchmark-distributed-spmv-cuda tnl ) install( TARGETS tnl-benchmark-distributed-spmv-cuda RUNTIME DESTINATION bin ) endif() add_executable( tnl-benchmark-distributed-spmv tnl-benchmark-distributed-spmv.cpp ) target_link_libraries( tnl-benchmark-distributed-spmv tnl ) install( TARGETS tnl-benchmark-distributed-spmv RUNTIME DESTINATION bin )
src/Benchmarks/DistSpMV/ordering.h 0 → 100644 +133 −0 Original line number Original line Diff line number Diff line #pragma once #include <algorithm> #include <TNL/Devices/Host.h> #include <TNL/ParallelFor.h> using namespace TNL; template< typename Matrix, typename PermutationVector > void getTrivialOrdering( const Matrix& matrix, PermutationVector& perm, PermutationVector& iperm ) { using IndexType = typename Matrix::IndexType; // allocate permutation vectors perm.setSize( matrix.getRows() ); iperm.setSize( matrix.getRows() ); const IndexType N = matrix.getRows() / 2; for( IndexType i = 0; i < N; i++ ) { perm[ 2 * i ] = i; perm[ 2 * i + 1 ] = i + N; iperm[ i ] = 2 * i; iperm[ i + N ] = 2 * i + 1; } } template< typename Vector, typename PermutationVector > void reorderVector( const Vector& src, Vector& dest, const PermutationVector& perm ) { TNL_ASSERT_EQ( src.getSize(), perm.getSize(), "Source vector and permutation must have the same size." ); using RealType = typename Vector::RealType; using DeviceType = typename Vector::DeviceType; using IndexType = typename Vector::IndexType; auto kernel = [] __cuda_callable__ ( IndexType i, const RealType* src, RealType* dest, const typename PermutationVector::RealType* perm ) { dest[ i ] = src[ perm[ i ] ]; }; dest.setLike( src ); ParallelFor< DeviceType >::exec( (IndexType) 0, src.getSize(), kernel, src.getData(), dest.getData(), perm.getData() ); } template< typename Matrix, typename PermutationVector > void reorderMatrix( const Matrix& matrix1, Matrix& matrix2, const PermutationVector& _perm, const PermutationVector& _iperm ) { // TODO: implement on GPU static_assert( std::is_same< typename Matrix::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); static_assert( std::is_same< typename PermutationVector::DeviceType, Devices::Host >::value, "matrix reordering is implemented only for host" ); using namespace TNL; using IndexType = typename Matrix::IndexType; matrix2.setLike( matrix1 ); // general multidimensional accessors for permutation indices // TODO: this depends on the specific layout of dofs, general reordering of NDArray is needed auto perm = [&]( IndexType dof ) { TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" ); const IndexType i = dof / _perm.getSize(); return i * _perm.getSize() + _perm[ dof % _perm.getSize() ]; }; auto iperm = [&]( IndexType dof ) { TNL_ASSERT_LT( dof, matrix1.getRows(), "invalid dof index" ); const IndexType i = dof / _iperm.getSize(); return i * _iperm.getSize() + _iperm[ dof % _iperm.getSize() ]; }; // set row lengths typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( matrix1.getRows() ); for( IndexType i = 0; i < matrix1.getRows(); i++ ) { const IndexType maxLength = matrix1.getRowLength( perm( i ) ); const auto row = matrix1.getRow( perm( i ) ); IndexType length = 0; for( IndexType j = 0; j < maxLength; j++ ) if( row.getElementColumn( j ) < matrix1.getColumns() ) length++; rowLengths[ i ] = length; } matrix2.setCompressedRowLengths( rowLengths ); // set row elements for( IndexType i = 0; i < matrix2.getRows(); i++ ) { const IndexType rowLength = rowLengths[ i ]; // extract sparse row const auto row1 = matrix1.getRow( perm( i ) ); // permute typename Matrix::IndexType columns[ rowLength ]; typename Matrix::RealType values[ rowLength ]; for( IndexType j = 0; j < rowLength; j++ ) { columns[ j ] = iperm( row1.getElementColumn( j ) ); values[ j ] = row1.getElementValue( j ); } // sort IndexType indices[ rowLength ]; for( IndexType j = 0; j < rowLength; j++ ) indices[ j ] = j; // nvcc does not allow lambdas to capture VLAs, even in host code (WTF!?) // error: a variable captured by a lambda cannot have a type involving a variable-length array IndexType* _columns = columns; auto comparator = [=]( IndexType a, IndexType b ) { return _columns[ a ] < _columns[ b ]; }; std::sort( indices, indices + rowLength, comparator ); typename Matrix::IndexType sortedColumns[ rowLength ]; typename Matrix::RealType sortedValues[ rowLength ]; for( IndexType j = 0; j < rowLength; j++ ) { sortedColumns[ j ] = columns[ indices[ j ] ]; sortedValues[ j ] = values[ indices[ j ] ]; } matrix2.setRow( i, sortedColumns, sortedValues, rowLength ); } }
src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cpp 0 → 100644 +11 −0 Original line number Original line Diff line number Diff line /*************************************************************************** tnl-benchmark-distributed-spmv.cpp - description ------------------- begin : Sep 11, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #include "tnl-benchmark-distributed-spmv.h"
src/Benchmarks/DistSpMV/tnl-benchmark-distributed-spmv.cu 0 → 100644 +11 −0 Original line number Original line Diff line number Diff line /*************************************************************************** tnl-benchmark-distributed-spmv.cu - description ------------------- begin : Sep 11, 2018 copyright : (C) 2018 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ #include "tnl-benchmark-distributed-spmv.h"