Skip to content
Snippets Groups Projects
VectorOperationsHost_impl.h 3.31 KiB
Newer Older
  • Learn to ignore specific revisions
  • /***************************************************************************
    
                              VectorOperationsHost_impl.h  -  description
    
                                 -------------------
        begin                : Nov 8, 2012
        copyright            : (C) 2012 by Tomas Oberhuber
        email                : tomas.oberhuber@fjfi.cvut.cz
     ***************************************************************************/
    
    
    /* See Copyright Notice in tnl/Copyright */
    
    #include <TNL/Math.h>
    
    #include <TNL/Containers/Algorithms/VectorOperations.h>
    
    
    namespace TNL {
    
    template< typename Vector >
    
    void
    VectorOperations< Devices::Host >::
    addElement( Vector& v,
                const typename Vector::IndexType i,
                const typename Vector::RealType& value )
    
    template< typename Vector, typename Scalar >
    
    void
    VectorOperations< Devices::Host >::
    addElement( Vector& v,
                const typename Vector::IndexType i,
                const typename Vector::RealType& value,
    
                const Scalar thisElementMultiplicator )
    
    {
       v[ i ] = thisElementMultiplicator * v[ i ] + value;
    }
    
    
    template< typename Vector, typename ResultType >
    ResultType
    VectorOperations< Devices::Host >::
    getVectorSum( const Vector& v )
    {
       TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
    
       if( std::is_same< ResultType, bool >::value )
          abort();
    
       using RealType = typename Vector::RealType;
       using IndexType = typename Vector::IndexType;
    
       const auto* data = v.getData();
       auto fetch = [=] __cuda_callable__ ( IndexType i )  -> ResultType { return  data[ i ]; };
       auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
       auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
       return Reduction< Devices::Host >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
    }
    
    template< Algorithms::PrefixSumType Type, typename Vector >
    void
    VectorOperations< Devices::Host >::
    prefixSum( Vector& v,
               typename Vector::IndexType begin,
               typename Vector::IndexType end )
    {
       using RealType = typename Vector::RealType;
       using IndexType = typename Vector::IndexType;
    
       auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; };
       auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; };
    
       PrefixSum< Devices::Host, Type >::perform( v, begin, end, reduction, volatileReduction, ( RealType ) 0.0 );
    }
    
    template< Algorithms::PrefixSumType Type, typename Vector, typename Flags >
    void
    VectorOperations< Devices::Host >::
    segmentedPrefixSum( Vector& v,
                        Flags& f,
                        typename Vector::IndexType begin,
                        typename Vector::IndexType end )
    {
       using RealType = typename Vector::RealType;
       using IndexType = typename Vector::IndexType;
    
       auto reduction = [=] __cuda_callable__ ( RealType& a, const RealType& b ) { a += b; };
       auto volatileReduction = [=] __cuda_callable__ ( volatile RealType& a, volatile RealType& b ) { a += b; };
    
       SegmentedPrefixSum< Devices::Host, Type >::perform( v, f, begin, end, reduction, volatileReduction, ( RealType ) 0.0 );
    }
    
    } // namespace Containers