Loading src/core/implementation/tnlSharedVector_impl.h +26 −1 Original line number Diff line number Diff line Loading @@ -227,8 +227,33 @@ void tnlSharedVector< Real, Device, Index > :: saxpsby( const Real& alpha, const Vector& x, const Real& beta ) { vectorSaxpsbz( *this, x, alpha, beta ); vectorSaxpsby( *this, x, alpha, beta ); } template< typename Real, typename Device, typename Index > template< typename Vector > void tnlSharedVector< Real, Device, Index > :: saxpsbz( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ) { vectorSaxpsbz( *this, x, alpha, z, beta ); } template< typename Real, typename Device, typename Index > template< typename Vector > void tnlSharedVector< Real, Device, Index > :: saxpsbzpy( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ) { vectorSaxpsbz( *this, x, alpha, z, beta ); } #endif /* TNLSHAREDVECTOR_H_IMPLEMENTATION */ src/core/implementation/tnlVector_impl.h +26 −0 Original line number Diff line number Diff line Loading @@ -255,4 +255,30 @@ void tnlVector< Real, Device, Index > :: saxpsby( const Real& alpha, vectorSaxpsby( *this, x, alpha, beta ); } template< typename Real, typename Device, typename Index > template< typename Vector > void tnlVector< Real, Device, Index > :: saxpsbz( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ) { vectorSaxpsbz( *this, x, alpha, z, beta ); } template< typename Real, typename Device, typename Index > template< typename Vector > void tnlVector< Real, Device, Index > :: saxpsbzpy( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ) { vectorSaxpsbzpy( *this, x, alpha, z, beta ); } #endif /* TNLVECTOR_H_IMPLEMENTATION */ src/core/implementation/vector-operations.h +140 −1 Original line number Diff line number Diff line Loading @@ -871,7 +871,6 @@ void vectorSaxmy( Vector1& y, } } template< typename Vector1, typename Vector2 > void hostVectorSaxpsby( Vector1& y, const Vector2& x, Loading Loading @@ -936,4 +935,144 @@ void vectorSaxpsby( Vector1& y, break; } } template< typename Vector1, typename Vector2 > void hostVectorSaxpsbz( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: RealType Real; typedef typename Vector1 :: IndexType Index; const Index n = y. getSize(); for( Index i = 0; i < n; i ++ ) y[ i ] = alpha * x[ i ] + beta * z[ i ]; } template< typename Vector1, typename Vector2 > void cudaVectorSaxpsbz( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: RealType Real; typedef typename Vector1 :: IndexType Index; #ifdef HAVE_CUDA dim3 blockSize, gridSize; blockSize. x = 512; gridSize. x = x. getSize() / 512 + 1; tnlVectorCUDASaxpsbzKernel<<< gridSize, blockSize >>>( y. getSize(), alpha, x. getData(), beta, z. getData() ); #else cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; #endif } template< typename Vector1, typename Vector2 > void vectorSaxpsbz( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: DeviceType Device1; typedef typename Vector2 :: DeviceType Device2; tnlAssert( y. getSize() > 0, cerr << "Vector name is " << v1. getName() ); tnlAssert( y. getSize() == x. getSize(), cerr << "Vector names are " << x. getName() << " and " << y. getName() ); tnlAssert( Device1 :: getDevice() == Device2 :: getDevice(), cerr << "Vector names are " << y. getName() << " and " << x. getName() ); switch( Device1 :: getDevice() ) { case tnlHostDevice: return hostVectorSaxpsbz( y, x, alpha, z, beta ); break; case tnlCudaDevice: return cudaVectorSaxpsbz( y, x, alpha, z, beta ); break; } } template< typename Vector1, typename Vector2 > void hostVectorSaxpsbzpy( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: RealType Real; typedef typename Vector1 :: IndexType Index; const Index n = y. getSize(); for( Index i = 0; i < n; i ++ ) y[ i ] += alpha * x[ i ] + beta * z[ i ]; } template< typename Vector1, typename Vector2 > void cudaVectorSaxpsbzpy( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: RealType Real; typedef typename Vector1 :: IndexType Index; #ifdef HAVE_CUDA dim3 blockSize, gridSize; blockSize. x = 512; gridSize. x = x. getSize() / 512 + 1; tnlVectorCUDASaxpsbzpyKernel<<< gridSize, blockSize >>>( y. getSize(), alpha, x. getData(), beta, z. getData() ); #else cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; #endif } template< typename Vector1, typename Vector2 > void vectorSaxpsbzpy( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: DeviceType Device1; typedef typename Vector2 :: DeviceType Device2; tnlAssert( y. getSize() > 0, cerr << "Vector name is " << v1. getName() ); tnlAssert( y. getSize() == x. getSize(), cerr << "Vector names are " << x. getName() << " and " << y. getName() ); tnlAssert( Device1 :: getDevice() == Device2 :: getDevice(), cerr << "Vector names are " << y. getName() << " and " << x. getName() ); switch( Device1 :: getDevice() ) { case tnlHostDevice: return hostVectorSaxpsbzpy( y, x, alpha, z, beta ); break; case tnlCudaDevice: return cudaVectorSaxpsbzpy( y, x, alpha, z, beta ); break; } } #endif /* VECTOROPERATIONS_H_ */ src/core/tnlSharedVector.h +22 −0 Original line number Diff line number Diff line Loading @@ -103,6 +103,28 @@ class tnlSharedVector : public tnlSharedArray< Real, Device, Index > void saxpsby( const Real& alpha, const Vector& x, const Real& beta ); //! Computes Y = Scalar Alpha X Plus Scalar Beta Z /*!** * It is not standard BLAS function as well. */ template< typename Vector > void saxpsbz( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ); //! Computes Y = Scalar Alpha X Plus Scalar Beta Z Plus Y /*!** * It is not standard BLAS function as well. */ template< typename Vector > void saxpsbzpy( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ); }; #include <core/implementation/tnlSharedVector_impl.h> Loading src/core/tnlVector.h +20 −0 Original line number Diff line number Diff line Loading @@ -109,6 +109,26 @@ class tnlVector : public tnlArray< Real, Device, Index > void saxpsby( const Real& alpha, const Vector& x, const Real& beta ); //! Computes Y = Scalar Alpha X Plus Scalar Beta Z /*!** * It is not standard BLAS function as well. */ template< typename Vector > void saxpsbz( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ); //! Computes Y = Scalar Alpha X Plus Scalar Beta Z Plus Y /*!** * It is not standard BLAS function as well. */ template< typename Vector > void saxpsbzpy( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ); }; #include <core/implementation/tnlVector_impl.h> Loading Loading
src/core/implementation/tnlSharedVector_impl.h +26 −1 Original line number Diff line number Diff line Loading @@ -227,8 +227,33 @@ void tnlSharedVector< Real, Device, Index > :: saxpsby( const Real& alpha, const Vector& x, const Real& beta ) { vectorSaxpsbz( *this, x, alpha, beta ); vectorSaxpsby( *this, x, alpha, beta ); } template< typename Real, typename Device, typename Index > template< typename Vector > void tnlSharedVector< Real, Device, Index > :: saxpsbz( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ) { vectorSaxpsbz( *this, x, alpha, z, beta ); } template< typename Real, typename Device, typename Index > template< typename Vector > void tnlSharedVector< Real, Device, Index > :: saxpsbzpy( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ) { vectorSaxpsbz( *this, x, alpha, z, beta ); } #endif /* TNLSHAREDVECTOR_H_IMPLEMENTATION */
src/core/implementation/tnlVector_impl.h +26 −0 Original line number Diff line number Diff line Loading @@ -255,4 +255,30 @@ void tnlVector< Real, Device, Index > :: saxpsby( const Real& alpha, vectorSaxpsby( *this, x, alpha, beta ); } template< typename Real, typename Device, typename Index > template< typename Vector > void tnlVector< Real, Device, Index > :: saxpsbz( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ) { vectorSaxpsbz( *this, x, alpha, z, beta ); } template< typename Real, typename Device, typename Index > template< typename Vector > void tnlVector< Real, Device, Index > :: saxpsbzpy( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ) { vectorSaxpsbzpy( *this, x, alpha, z, beta ); } #endif /* TNLVECTOR_H_IMPLEMENTATION */
src/core/implementation/vector-operations.h +140 −1 Original line number Diff line number Diff line Loading @@ -871,7 +871,6 @@ void vectorSaxmy( Vector1& y, } } template< typename Vector1, typename Vector2 > void hostVectorSaxpsby( Vector1& y, const Vector2& x, Loading Loading @@ -936,4 +935,144 @@ void vectorSaxpsby( Vector1& y, break; } } template< typename Vector1, typename Vector2 > void hostVectorSaxpsbz( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: RealType Real; typedef typename Vector1 :: IndexType Index; const Index n = y. getSize(); for( Index i = 0; i < n; i ++ ) y[ i ] = alpha * x[ i ] + beta * z[ i ]; } template< typename Vector1, typename Vector2 > void cudaVectorSaxpsbz( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: RealType Real; typedef typename Vector1 :: IndexType Index; #ifdef HAVE_CUDA dim3 blockSize, gridSize; blockSize. x = 512; gridSize. x = x. getSize() / 512 + 1; tnlVectorCUDASaxpsbzKernel<<< gridSize, blockSize >>>( y. getSize(), alpha, x. getData(), beta, z. getData() ); #else cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; #endif } template< typename Vector1, typename Vector2 > void vectorSaxpsbz( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: DeviceType Device1; typedef typename Vector2 :: DeviceType Device2; tnlAssert( y. getSize() > 0, cerr << "Vector name is " << v1. getName() ); tnlAssert( y. getSize() == x. getSize(), cerr << "Vector names are " << x. getName() << " and " << y. getName() ); tnlAssert( Device1 :: getDevice() == Device2 :: getDevice(), cerr << "Vector names are " << y. getName() << " and " << x. getName() ); switch( Device1 :: getDevice() ) { case tnlHostDevice: return hostVectorSaxpsbz( y, x, alpha, z, beta ); break; case tnlCudaDevice: return cudaVectorSaxpsbz( y, x, alpha, z, beta ); break; } } template< typename Vector1, typename Vector2 > void hostVectorSaxpsbzpy( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: RealType Real; typedef typename Vector1 :: IndexType Index; const Index n = y. getSize(); for( Index i = 0; i < n; i ++ ) y[ i ] += alpha * x[ i ] + beta * z[ i ]; } template< typename Vector1, typename Vector2 > void cudaVectorSaxpsbzpy( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: RealType Real; typedef typename Vector1 :: IndexType Index; #ifdef HAVE_CUDA dim3 blockSize, gridSize; blockSize. x = 512; gridSize. x = x. getSize() / 512 + 1; tnlVectorCUDASaxpsbzpyKernel<<< gridSize, blockSize >>>( y. getSize(), alpha, x. getData(), beta, z. getData() ); #else cerr << "I am sorry but CUDA support is missing on this system " << __FILE__ << " line " << __LINE__ << "." << endl; #endif } template< typename Vector1, typename Vector2 > void vectorSaxpsbzpy( Vector1& y, const Vector2& x, const typename Vector1 :: RealType& alpha, const Vector2& z, const typename Vector1 :: RealType& beta ) { typedef typename Vector1 :: DeviceType Device1; typedef typename Vector2 :: DeviceType Device2; tnlAssert( y. getSize() > 0, cerr << "Vector name is " << v1. getName() ); tnlAssert( y. getSize() == x. getSize(), cerr << "Vector names are " << x. getName() << " and " << y. getName() ); tnlAssert( Device1 :: getDevice() == Device2 :: getDevice(), cerr << "Vector names are " << y. getName() << " and " << x. getName() ); switch( Device1 :: getDevice() ) { case tnlHostDevice: return hostVectorSaxpsbzpy( y, x, alpha, z, beta ); break; case tnlCudaDevice: return cudaVectorSaxpsbzpy( y, x, alpha, z, beta ); break; } } #endif /* VECTOROPERATIONS_H_ */
src/core/tnlSharedVector.h +22 −0 Original line number Diff line number Diff line Loading @@ -103,6 +103,28 @@ class tnlSharedVector : public tnlSharedArray< Real, Device, Index > void saxpsby( const Real& alpha, const Vector& x, const Real& beta ); //! Computes Y = Scalar Alpha X Plus Scalar Beta Z /*!** * It is not standard BLAS function as well. */ template< typename Vector > void saxpsbz( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ); //! Computes Y = Scalar Alpha X Plus Scalar Beta Z Plus Y /*!** * It is not standard BLAS function as well. */ template< typename Vector > void saxpsbzpy( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ); }; #include <core/implementation/tnlSharedVector_impl.h> Loading
src/core/tnlVector.h +20 −0 Original line number Diff line number Diff line Loading @@ -109,6 +109,26 @@ class tnlVector : public tnlArray< Real, Device, Index > void saxpsby( const Real& alpha, const Vector& x, const Real& beta ); //! Computes Y = Scalar Alpha X Plus Scalar Beta Z /*!** * It is not standard BLAS function as well. */ template< typename Vector > void saxpsbz( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ); //! Computes Y = Scalar Alpha X Plus Scalar Beta Z Plus Y /*!** * It is not standard BLAS function as well. */ template< typename Vector > void saxpsbzpy( const Real& alpha, const Vector& x, const Real& beta, const Vector& z ); }; #include <core/implementation/tnlVector_impl.h> Loading