Loading src/TNL/Containers/Segments/Ellpack.hpp +5 −5 Original line number Diff line number Diff line Loading @@ -306,7 +306,7 @@ void Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); if( RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; Loading @@ -315,8 +315,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = begin + segmentSize; RealType aux( zero ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) reduction( aux, fetch( i, j, compute, args... ) ); for( IndexType j = begin, localIdx = 0; j < end && compute; j++, localIdx++ ) reduction( aux, fetch( i, localIdx, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading @@ -330,8 +330,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = storageSize; RealType aux( zero ); bool compute( true ); for( IndexType j = begin; j < end && compute; j += alignedSize ) reduction( aux, fetch( i, j, compute, args... ) ); for( IndexType j = begin, localIdx = 0; j < end && compute; j += alignedSize, localIdx++ ) reduction( aux, fetch( i, localIdx, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading src/TNL/Matrices/Dense.h +2 −2 Original line number Diff line number Diff line Loading @@ -48,8 +48,8 @@ class Dense : public Matrix< Real, Device, Index > using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentViewType = typename SegmentsType::SegmentViewType; using ViewType = DenseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; // TODO: remove this Loading src/TNL/Matrices/Dense.hpp +19 −53 Original line number Diff line number Diff line Loading @@ -41,10 +41,9 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder, template< typename, typename, typename > class Segments, typename RealAllocator > auto Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >:: Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getView() -> ViewType { return ViewType( this->getRows(), Loading @@ -57,10 +56,9 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder, template< typename, typename, typename > class Segments, typename RealAllocator > auto Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >:: Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( this->getRows(), Loading Loading @@ -451,8 +449,9 @@ template< typename Real, typename RealAllocator > template< typename InVector, typename OutVector > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector, OutVector& outVector ) const void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( this->getColumns() == inVector.getSize(), std::cerr << "Matrix columns: " << this->getColumns() << std::endl Loading @@ -461,7 +460,20 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( std::cerr << "Matrix rows: " << this->getRows() << std::endl << "Vector size: " << outVector.getSize() << std::endl ); DeviceDependentCode::vectorProduct( *this, inVector, outVector ); //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { return valuesView[ offset ] * inVectorView[ column ]; }; auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = value; }; this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); } template< typename Real, Loading Loading @@ -1051,51 +1063,5 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementInde return this->segments.getGlobalIndex( row, column ); } template<> class DenseDeviceDependentCode< Devices::Host > { public: typedef Devices::Host Device; template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator, typename InVector, typename OutVector > static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { #ifdef HAVE_OPENMP #pragma omp parallel for if( Devices::Host::isOMPEnabled() ) #endif for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); } }; template<> class DenseDeviceDependentCode< Devices::Cuda > { public: typedef Devices::Cuda Device; template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator, typename InVector, typename OutVector > static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { MatrixVectorProductCuda( matrix, inVector, outVector ); } }; } // namespace Matrices } // namespace TNL src/TNL/Matrices/DenseMatrixView.h +9 −9 Original line number Diff line number Diff line Loading @@ -14,14 +14,11 @@ #include <TNL/Devices/Host.h> #include <TNL/Matrices/DenseMatrixRowView.h> #include <TNL/Matrices/MatrixView.h> #include <TNL/Containers/Segments/EllpackView.h> #include <TNL/Containers/Segments/Ellpack.h> namespace TNL { namespace Matrices { //template< typename Device > //class DenseDeviceDependentCode; template< typename Real = double, typename Device = Devices::Host, typename Index = int, Loading @@ -48,6 +45,9 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > using SegmentsViewType = typename SegmentsType::ViewType; using SegmentViewType = typename SegmentsType::SegmentViewType; using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; // TODO: remove this using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; Loading @@ -56,7 +56,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > template< typename _Real = Real, typename _Device = Device, typename _Index = Index > using Self = Dense< _Real, _Device, _Index >; using Self = DenseMatrixView< _Real, _Device, _Index >; __cuda_callable__ DenseMatrixView(); Loading Loading @@ -172,12 +172,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > const RealType& omega = 1.0 ) const; // copy assignment Dense& operator=( const Dense& matrix ); DenseMatrixView& operator=( const DenseMatrixView& matrix ); // cross-device copy assignment template< typename Real2, typename Device2, typename Index2, typename = typename Enabler< Device2 >::type > Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); DenseMatrixView& operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix ); void save( const String& fileName ) const; Loading @@ -195,8 +195,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > IndexType getElementIndex( const IndexType row, const IndexType column ) const; typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; friend class DenseDeviceDependentCode< DeviceType >; //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; //friend class DenseDeviceDependentCode< DeviceType >; SegmentsViewType segments; }; Loading src/TNL/Matrices/DenseMatrixView.hpp +61 −101 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
src/TNL/Containers/Segments/Ellpack.hpp +5 −5 Original line number Diff line number Diff line Loading @@ -306,7 +306,7 @@ void Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); if( RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; Loading @@ -315,8 +315,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = begin + segmentSize; RealType aux( zero ); bool compute( true ); for( IndexType j = begin; j < end && compute; j++ ) reduction( aux, fetch( i, j, compute, args... ) ); for( IndexType j = begin, localIdx = 0; j < end && compute; j++, localIdx++ ) reduction( aux, fetch( i, localIdx, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading @@ -330,8 +330,8 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red const IndexType end = storageSize; RealType aux( zero ); bool compute( true ); for( IndexType j = begin; j < end && compute; j += alignedSize ) reduction( aux, fetch( i, j, compute, args... ) ); for( IndexType j = begin, localIdx = 0; j < end && compute; j += alignedSize, localIdx++ ) reduction( aux, fetch( i, localIdx, j, compute, args... ) ); keeper( i, aux ); }; Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); Loading
src/TNL/Matrices/Dense.h +2 −2 Original line number Diff line number Diff line Loading @@ -48,8 +48,8 @@ class Dense : public Matrix< Real, Device, Index > using ValuesViewType = typename ValuesType::ViewType; using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; using SegmentViewType = typename SegmentsType::SegmentViewType; using ViewType = DenseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, MatrixType, SegmentsViewTemplate >; using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; // TODO: remove this Loading
src/TNL/Matrices/Dense.hpp +19 −53 Original line number Diff line number Diff line Loading @@ -41,10 +41,9 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder, template< typename, typename, typename > class Segments, typename RealAllocator > auto Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >:: Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getView() -> ViewType { return ViewType( this->getRows(), Loading @@ -57,10 +56,9 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder, template< typename, typename, typename > class Segments, typename RealAllocator > auto Dense< Real, Device, Index, RowMajorOrder, Segments, RealAllocator >:: Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( this->getRows(), Loading Loading @@ -451,8 +449,9 @@ template< typename Real, typename RealAllocator > template< typename InVector, typename OutVector > void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( const InVector& inVector, OutVector& outVector ) const void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT( this->getColumns() == inVector.getSize(), std::cerr << "Matrix columns: " << this->getColumns() << std::endl Loading @@ -461,7 +460,20 @@ void Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::vectorProduct( std::cerr << "Matrix rows: " << this->getRows() << std::endl << "Vector size: " << outVector.getSize() << std::endl ); DeviceDependentCode::vectorProduct( *this, inVector, outVector ); //DeviceDependentCode::vectorProduct( *this, inVector, outVector ); const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { return valuesView[ offset ] * inVectorView[ column ]; }; auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = value; }; this->segments.segmentsReduction( 0, this->getRows(), fetch, reduction, keeper, ( RealType ) 0.0 ); } template< typename Real, Loading Loading @@ -1051,51 +1063,5 @@ Index Dense< Real, Device, Index, RowMajorOrder, RealAllocator >::getElementInde return this->segments.getGlobalIndex( row, column ); } template<> class DenseDeviceDependentCode< Devices::Host > { public: typedef Devices::Host Device; template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator, typename InVector, typename OutVector > static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { #ifdef HAVE_OPENMP #pragma omp parallel for if( Devices::Host::isOMPEnabled() ) #endif for( Index row = 0; row < matrix.getRows(); row ++ ) outVector[ row ] = matrix.rowVectorProduct( row, inVector ); } }; template<> class DenseDeviceDependentCode< Devices::Cuda > { public: typedef Devices::Cuda Device; template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator, typename InVector, typename OutVector > static void vectorProduct( const Dense< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { MatrixVectorProductCuda( matrix, inVector, outVector ); } }; } // namespace Matrices } // namespace TNL
src/TNL/Matrices/DenseMatrixView.h +9 −9 Original line number Diff line number Diff line Loading @@ -14,14 +14,11 @@ #include <TNL/Devices/Host.h> #include <TNL/Matrices/DenseMatrixRowView.h> #include <TNL/Matrices/MatrixView.h> #include <TNL/Containers/Segments/EllpackView.h> #include <TNL/Containers/Segments/Ellpack.h> namespace TNL { namespace Matrices { //template< typename Device > //class DenseDeviceDependentCode; template< typename Real = double, typename Device = Devices::Host, typename Index = int, Loading @@ -48,6 +45,9 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > using SegmentsViewType = typename SegmentsType::ViewType; using SegmentViewType = typename SegmentsType::SegmentViewType; using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; // TODO: remove this using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; Loading @@ -56,7 +56,7 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > template< typename _Real = Real, typename _Device = Device, typename _Index = Index > using Self = Dense< _Real, _Device, _Index >; using Self = DenseMatrixView< _Real, _Device, _Index >; __cuda_callable__ DenseMatrixView(); Loading Loading @@ -172,12 +172,12 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > const RealType& omega = 1.0 ) const; // copy assignment Dense& operator=( const Dense& matrix ); DenseMatrixView& operator=( const DenseMatrixView& matrix ); // cross-device copy assignment template< typename Real2, typename Device2, typename Index2, typename = typename Enabler< Device2 >::type > Dense& operator=( const Dense< Real2, Device2, Index2 >& matrix ); DenseMatrixView& operator=( const DenseMatrixView< Real2, Device2, Index2 >& matrix ); void save( const String& fileName ) const; Loading @@ -195,8 +195,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > IndexType getElementIndex( const IndexType row, const IndexType column ) const; typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; friend class DenseDeviceDependentCode< DeviceType >; //typedef DenseDeviceDependentCode< DeviceType > DeviceDependentCode; //friend class DenseDeviceDependentCode< DeviceType >; SegmentsViewType segments; }; Loading
src/TNL/Matrices/DenseMatrixView.hpp +61 −101 File changed.Preview size limit exceeded, changes collapsed. Show changes