Loading src/Python/pytnl/tnl/Vector.h +2 −2 Original line number Diff line number Diff line Loading @@ -46,7 +46,7 @@ void export_Vector(py::module & m, const char* name) .def("differenceLpNorm", &VectorType::template differenceLpNorm<double, VectorType, double>) .def("differenceSum", &VectorType::template differenceSum<double, VectorType>) .def("scalarProduct", &VectorType::template scalarProduct<VectorType>) .def("addVector", &VectorType::template addVector<VectorType>) .def("addVectors", &VectorType::template addVectors<VectorType>) .def("addVector", &VectorType::template addVector<VectorType, double, double>) .def("addVectors", &VectorType::template addVectors<VectorType, VectorType, double, double, double>) ; } src/TNL/Containers/Algorithms/VectorOperations.h +45 −51 Original line number Diff line number Diff line Loading @@ -30,11 +30,11 @@ public: const typename Vector::IndexType i, const typename Vector::RealType& value ); template< typename Vector > template< typename Vector, typename Scalar > static void addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ); const Scalar thisElementMultiplicator ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); Loading @@ -54,9 +54,8 @@ public: template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL2Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ > static ResultType getVectorLpNorm( const Vector& v, const Real_ p ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorSum( const Vector& v ); Loading @@ -79,32 +78,31 @@ public: template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); template< typename Vector, typename Scalar > static void vectorScalarMultiplication( Vector& v, Scalar alpha ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > static void addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar1 alpha, const Scalar2 thisMultiplicator = 1.0 ); template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > static void addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); template< typename Vector > static void computePrefixSum( Vector& v, Loading @@ -126,11 +124,11 @@ public: const typename Vector::IndexType i, const typename Vector::RealType& value ); template< typename Vector > template< typename Vector, typename Scalar > static void addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ); const Scalar thisElementMultiplicator ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); Loading @@ -150,9 +148,8 @@ public: template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL2Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ > static ResultType getVectorLpNorm( const Vector& v, const Real_ p ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorSum( const Vector& v ); Loading @@ -175,32 +172,31 @@ public: template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); template< typename Vector, typename Scalar > static void vectorScalarMultiplication( Vector& v, const Scalar alpha ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > static void addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar1 alpha, const Scalar2 thisMultiplicator = 1.0 ); template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > static void addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); template< typename Vector > static void computePrefixSum( Vector& v, Loading @@ -223,11 +219,11 @@ public: const typename Vector::IndexType i, const typename Vector::RealType& value ); template< typename Vector > template< typename Vector, typename Scalar > static void addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ); const Scalar thisElementMultiplicator ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); Loading @@ -247,9 +243,8 @@ public: template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL2Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ > static ResultType getVectorLpNorm( const Vector& v, const Real_ p ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorSum( const Vector& v ); Loading @@ -272,32 +267,31 @@ public: template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); template< typename Vector, typename Scalar > static void vectorScalarMultiplication( Vector& v, const Scalar alpha ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > static void addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar1 alpha, const Scalar2 thisMultiplicator = 1.0 ); template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > static void addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); template< typename Vector > static void computePrefixSum( Vector& v, Loading src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h +33 −35 Original line number Diff line number Diff line Loading @@ -29,13 +29,13 @@ addElement( Vector& v, v[ i ] += value; } template< typename Vector > template< typename Vector, typename Scalar > void VectorOperations< Devices::Cuda >:: addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ) const Scalar thisElementMultiplicator ) { v[ i ] = thisElementMultiplicator * v[ i ] + value; } Loading Loading @@ -137,11 +137,11 @@ getVectorL2Norm( const Vector& v ) return std::sqrt( result ); } template< typename Vector, typename ResultType, typename Real_ > template< typename Vector, typename ResultType, typename Scalar > ResultType VectorOperations< Devices::Cuda >:: getVectorLpNorm( const Vector& v, const Real_ p ) const Scalar p ) { typedef typename Vector::RealType Real; Loading @@ -153,7 +153,7 @@ getVectorLpNorm( const Vector& v, if( p == 2 ) return getVectorL2Norm< Vector, ResultType >( v ); Algorithms::ParallelReductionLpNorm< Real, ResultType, Real_ > operation; Algorithms::ParallelReductionLpNorm< Real, ResultType, Scalar > operation; operation.setPower( p ); const ResultType result = Reduction< Devices::Cuda >::reduce( operation, v.getSize(), Loading Loading @@ -276,12 +276,12 @@ getVectorDifferenceL2Norm( const Vector1& v1, return std::sqrt( result ); } template< typename Vector1, typename Vector2, typename ResultType, typename Real_ > template< typename Vector1, typename Vector2, typename ResultType, typename Scalar > ResultType VectorOperations< Devices::Cuda >:: getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ) const Scalar p ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); Loading @@ -292,7 +292,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, if( p == 2.0 ) return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Real_ > operation; Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Scalar > operation; operation.setPower( p ); const ResultType result = Reduction< Devices::Cuda >::reduce( operation, v1.getSize(), Loading @@ -318,11 +318,11 @@ getVectorDifferenceSum( const Vector1& v1, } #ifdef HAVE_CUDA template< typename Real, typename Index > template< typename Real, typename Index, typename Scalar > __global__ void vectorScalarMultiplicationCudaKernel( Real* data, Index size, Real alpha ) Scalar alpha ) { Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x; const Index maxGridSize = blockDim.x * gridDim.x; Loading @@ -334,11 +334,11 @@ vectorScalarMultiplicationCudaKernel( Real* data, } #endif template< typename Vector > template< typename Vector, typename Scalar > void VectorOperations< Devices::Cuda >:: vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ) const Scalar alpha ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); Loading Loading @@ -376,14 +376,13 @@ getScalarProduct( const Vector1& v1, } #ifdef HAVE_CUDA template< typename Real, typename Index > template< typename Real1, typename Real2, typename Index, typename Scalar1, typename Scalar2 > __global__ void vectorAddVectorCudaKernel( Real* y, const Real* x, vectorAddVectorCudaKernel( Real1* y, const Real2* x, const Index size, const Real alpha, const Real thisMultiplicator ) const Scalar1 alpha, const Scalar2 thisMultiplicator ) { Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x; const Index maxGridSize = blockDim.x * gridDim.x; Loading @@ -402,13 +401,13 @@ vectorAddVectorCudaKernel( Real* y, } #endif template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > void VectorOperations< Devices::Cuda >:: addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator ) const Scalar1 alpha, const Scalar2 thisMultiplicator ) { TNL_ASSERT_GT( x.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( x.getSize(), y.getSize(), "The vector sizes must be the same." ); Loading @@ -435,16 +434,16 @@ addVector( Vector1& y, } #ifdef HAVE_CUDA template< typename Real, typename Index > template< typename Real1, typename Real2, typename Real3, typename Index, typename Scalar1, typename Scalar2, typename Scalar3 > __global__ void vectorAddVectorsCudaKernel( Real* v, const Real* v1, const Real* v2, vectorAddVectorsCudaKernel( Real1* v, const Real2* v1, const Real3* v2, const Index size, const Real multiplicator1, const Real multiplicator2, const Real thisMultiplicator ) const Scalar1 multiplicator1, const Scalar2 multiplicator2, const Scalar3 thisMultiplicator ) { Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x; const Index maxGridSize = blockDim.x * gridDim.x; Loading @@ -466,17 +465,16 @@ vectorAddVectorsCudaKernel( Real* v, } #endif template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > void VectorOperations< Devices::Cuda >:: addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator ) const Scalar2 multiplicator2, const Scalar3 thisMultiplicator ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v.getSize(), v1.getSize(), "The vector sizes must be the same." ); Loading src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h +19 −20 Original line number Diff line number Diff line Loading @@ -30,13 +30,13 @@ addElement( Vector& v, v[ i ] += value; } template< typename Vector > template< typename Vector, typename Scalar > void VectorOperations< Devices::Host >:: addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ) const Scalar thisElementMultiplicator ) { v[ i ] = thisElementMultiplicator * v[ i ] + value; } Loading Loading @@ -139,11 +139,11 @@ getVectorL2Norm( const Vector& v ) return std::sqrt( result ); } template< typename Vector, typename ResultType, typename Real_ > template< typename Vector, typename ResultType, typename Scalar > ResultType VectorOperations< Devices::Host >:: getVectorLpNorm( const Vector& v, const Real_ p ) const Scalar p ) { typedef typename Vector::RealType Real; Loading @@ -155,7 +155,7 @@ getVectorLpNorm( const Vector& v, if( p == 2.0 ) return getVectorL2Norm< Vector, ResultType >( v ); Algorithms::ParallelReductionLpNorm< Real, ResultType, Real_ > operation; Algorithms::ParallelReductionLpNorm< Real, ResultType, Scalar > operation; operation.setPower( p ); const ResultType result = Reduction< Devices::Host >::reduce( operation, v.getSize(), Loading Loading @@ -278,12 +278,12 @@ getVectorDifferenceL2Norm( const Vector1& v1, } template< typename Vector1, typename Vector2, typename ResultType, typename Real_ > template< typename Vector1, typename Vector2, typename ResultType, typename Scalar > ResultType VectorOperations< Devices::Host >:: getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ) const Scalar p ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); Loading @@ -294,7 +294,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, if( p == 2.0 ) return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Real_ > operation; Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Scalar > operation; operation.setPower( p ); const ResultType result = Reduction< Devices::Host >::reduce( operation, v1.getSize(), Loading @@ -320,11 +320,11 @@ getVectorDifferenceSum( const Vector1& v1, } template< typename Vector > template< typename Vector, typename Scalar > void VectorOperations< Devices::Host >:: vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ) const Scalar alpha ) { typedef typename Vector::IndexType Index; Loading Loading @@ -355,13 +355,13 @@ getScalarProduct( const Vector1& v1, v2.getData() ); } template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > void VectorOperations< Devices::Host >:: addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator ) const Scalar1 alpha, const Scalar2 thisMultiplicator ) { typedef typename Vector1::IndexType Index; Loading @@ -384,17 +384,16 @@ addVector( Vector1& y, y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; } template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > void VectorOperations< Devices::Host >:: addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator ) const Scalar2 multiplicator2, const Scalar3 thisMultiplicator ) { typedef typename Vector1::IndexType Index; Loading src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h +22 −24 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
src/Python/pytnl/tnl/Vector.h +2 −2 Original line number Diff line number Diff line Loading @@ -46,7 +46,7 @@ void export_Vector(py::module & m, const char* name) .def("differenceLpNorm", &VectorType::template differenceLpNorm<double, VectorType, double>) .def("differenceSum", &VectorType::template differenceSum<double, VectorType>) .def("scalarProduct", &VectorType::template scalarProduct<VectorType>) .def("addVector", &VectorType::template addVector<VectorType>) .def("addVectors", &VectorType::template addVectors<VectorType>) .def("addVector", &VectorType::template addVector<VectorType, double, double>) .def("addVectors", &VectorType::template addVectors<VectorType, VectorType, double, double, double>) ; }
src/TNL/Containers/Algorithms/VectorOperations.h +45 −51 Original line number Diff line number Diff line Loading @@ -30,11 +30,11 @@ public: const typename Vector::IndexType i, const typename Vector::RealType& value ); template< typename Vector > template< typename Vector, typename Scalar > static void addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ); const Scalar thisElementMultiplicator ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); Loading @@ -54,9 +54,8 @@ public: template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL2Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ > static ResultType getVectorLpNorm( const Vector& v, const Real_ p ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorSum( const Vector& v ); Loading @@ -79,32 +78,31 @@ public: template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); template< typename Vector, typename Scalar > static void vectorScalarMultiplication( Vector& v, Scalar alpha ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > static void addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar1 alpha, const Scalar2 thisMultiplicator = 1.0 ); template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > static void addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); template< typename Vector > static void computePrefixSum( Vector& v, Loading @@ -126,11 +124,11 @@ public: const typename Vector::IndexType i, const typename Vector::RealType& value ); template< typename Vector > template< typename Vector, typename Scalar > static void addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ); const Scalar thisElementMultiplicator ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); Loading @@ -150,9 +148,8 @@ public: template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL2Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ > static ResultType getVectorLpNorm( const Vector& v, const Real_ p ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorSum( const Vector& v ); Loading @@ -175,32 +172,31 @@ public: template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); template< typename Vector, typename Scalar > static void vectorScalarMultiplication( Vector& v, const Scalar alpha ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > static void addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar1 alpha, const Scalar2 thisMultiplicator = 1.0 ); template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > static void addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); template< typename Vector > static void computePrefixSum( Vector& v, Loading @@ -223,11 +219,11 @@ public: const typename Vector::IndexType i, const typename Vector::RealType& value ); template< typename Vector > template< typename Vector, typename Scalar > static void addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ); const Scalar thisElementMultiplicator ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorMax( const Vector& v ); Loading @@ -247,9 +243,8 @@ public: template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorL2Norm( const Vector& v ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ > static ResultType getVectorLpNorm( const Vector& v, const Real_ p ); template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar > static ResultType getVectorLpNorm( const Vector& v, const Scalar p ); template< typename Vector, typename ResultType = typename Vector::RealType > static ResultType getVectorSum( const Vector& v ); Loading @@ -272,32 +267,31 @@ public: template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar > static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 ); template< typename Vector > static void vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ); template< typename Vector, typename Scalar > static void vectorScalarMultiplication( Vector& v, const Scalar alpha ); template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType > static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 ); template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > static void addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar1 alpha, const Scalar2 thisMultiplicator = 1.0 ); template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > static void addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator = 1.0 ); const Scalar2 multiplicator2, const Scalar3 thisMultiplicator = 1.0 ); template< typename Vector > static void computePrefixSum( Vector& v, Loading
src/TNL/Containers/Algorithms/VectorOperationsCuda_impl.h +33 −35 Original line number Diff line number Diff line Loading @@ -29,13 +29,13 @@ addElement( Vector& v, v[ i ] += value; } template< typename Vector > template< typename Vector, typename Scalar > void VectorOperations< Devices::Cuda >:: addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ) const Scalar thisElementMultiplicator ) { v[ i ] = thisElementMultiplicator * v[ i ] + value; } Loading Loading @@ -137,11 +137,11 @@ getVectorL2Norm( const Vector& v ) return std::sqrt( result ); } template< typename Vector, typename ResultType, typename Real_ > template< typename Vector, typename ResultType, typename Scalar > ResultType VectorOperations< Devices::Cuda >:: getVectorLpNorm( const Vector& v, const Real_ p ) const Scalar p ) { typedef typename Vector::RealType Real; Loading @@ -153,7 +153,7 @@ getVectorLpNorm( const Vector& v, if( p == 2 ) return getVectorL2Norm< Vector, ResultType >( v ); Algorithms::ParallelReductionLpNorm< Real, ResultType, Real_ > operation; Algorithms::ParallelReductionLpNorm< Real, ResultType, Scalar > operation; operation.setPower( p ); const ResultType result = Reduction< Devices::Cuda >::reduce( operation, v.getSize(), Loading Loading @@ -276,12 +276,12 @@ getVectorDifferenceL2Norm( const Vector1& v1, return std::sqrt( result ); } template< typename Vector1, typename Vector2, typename ResultType, typename Real_ > template< typename Vector1, typename Vector2, typename ResultType, typename Scalar > ResultType VectorOperations< Devices::Cuda >:: getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ) const Scalar p ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); Loading @@ -292,7 +292,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, if( p == 2.0 ) return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Real_ > operation; Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Scalar > operation; operation.setPower( p ); const ResultType result = Reduction< Devices::Cuda >::reduce( operation, v1.getSize(), Loading @@ -318,11 +318,11 @@ getVectorDifferenceSum( const Vector1& v1, } #ifdef HAVE_CUDA template< typename Real, typename Index > template< typename Real, typename Index, typename Scalar > __global__ void vectorScalarMultiplicationCudaKernel( Real* data, Index size, Real alpha ) Scalar alpha ) { Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x; const Index maxGridSize = blockDim.x * gridDim.x; Loading @@ -334,11 +334,11 @@ vectorScalarMultiplicationCudaKernel( Real* data, } #endif template< typename Vector > template< typename Vector, typename Scalar > void VectorOperations< Devices::Cuda >:: vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ) const Scalar alpha ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); Loading Loading @@ -376,14 +376,13 @@ getScalarProduct( const Vector1& v1, } #ifdef HAVE_CUDA template< typename Real, typename Index > template< typename Real1, typename Real2, typename Index, typename Scalar1, typename Scalar2 > __global__ void vectorAddVectorCudaKernel( Real* y, const Real* x, vectorAddVectorCudaKernel( Real1* y, const Real2* x, const Index size, const Real alpha, const Real thisMultiplicator ) const Scalar1 alpha, const Scalar2 thisMultiplicator ) { Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x; const Index maxGridSize = blockDim.x * gridDim.x; Loading @@ -402,13 +401,13 @@ vectorAddVectorCudaKernel( Real* y, } #endif template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > void VectorOperations< Devices::Cuda >:: addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator ) const Scalar1 alpha, const Scalar2 thisMultiplicator ) { TNL_ASSERT_GT( x.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( x.getSize(), y.getSize(), "The vector sizes must be the same." ); Loading @@ -435,16 +434,16 @@ addVector( Vector1& y, } #ifdef HAVE_CUDA template< typename Real, typename Index > template< typename Real1, typename Real2, typename Real3, typename Index, typename Scalar1, typename Scalar2, typename Scalar3 > __global__ void vectorAddVectorsCudaKernel( Real* v, const Real* v1, const Real* v2, vectorAddVectorsCudaKernel( Real1* v, const Real2* v1, const Real3* v2, const Index size, const Real multiplicator1, const Real multiplicator2, const Real thisMultiplicator ) const Scalar1 multiplicator1, const Scalar2 multiplicator2, const Scalar3 thisMultiplicator ) { Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x; const Index maxGridSize = blockDim.x * gridDim.x; Loading @@ -466,17 +465,16 @@ vectorAddVectorsCudaKernel( Real* v, } #endif template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > void VectorOperations< Devices::Cuda >:: addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator ) const Scalar2 multiplicator2, const Scalar3 thisMultiplicator ) { TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v.getSize(), v1.getSize(), "The vector sizes must be the same." ); Loading
src/TNL/Containers/Algorithms/VectorOperationsHost_impl.h +19 −20 Original line number Diff line number Diff line Loading @@ -30,13 +30,13 @@ addElement( Vector& v, v[ i ] += value; } template< typename Vector > template< typename Vector, typename Scalar > void VectorOperations< Devices::Host >:: addElement( Vector& v, const typename Vector::IndexType i, const typename Vector::RealType& value, const typename Vector::RealType& thisElementMultiplicator ) const Scalar thisElementMultiplicator ) { v[ i ] = thisElementMultiplicator * v[ i ] + value; } Loading Loading @@ -139,11 +139,11 @@ getVectorL2Norm( const Vector& v ) return std::sqrt( result ); } template< typename Vector, typename ResultType, typename Real_ > template< typename Vector, typename ResultType, typename Scalar > ResultType VectorOperations< Devices::Host >:: getVectorLpNorm( const Vector& v, const Real_ p ) const Scalar p ) { typedef typename Vector::RealType Real; Loading @@ -155,7 +155,7 @@ getVectorLpNorm( const Vector& v, if( p == 2.0 ) return getVectorL2Norm< Vector, ResultType >( v ); Algorithms::ParallelReductionLpNorm< Real, ResultType, Real_ > operation; Algorithms::ParallelReductionLpNorm< Real, ResultType, Scalar > operation; operation.setPower( p ); const ResultType result = Reduction< Devices::Host >::reduce( operation, v.getSize(), Loading Loading @@ -278,12 +278,12 @@ getVectorDifferenceL2Norm( const Vector1& v1, } template< typename Vector1, typename Vector2, typename ResultType, typename Real_ > template< typename Vector1, typename Vector2, typename ResultType, typename Scalar > ResultType VectorOperations< Devices::Host >:: getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p ) const Scalar p ) { TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." ); TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." ); Loading @@ -294,7 +294,7 @@ getVectorDifferenceLpNorm( const Vector1& v1, if( p == 2.0 ) return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 ); Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Real_ > operation; Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Scalar > operation; operation.setPower( p ); const ResultType result = Reduction< Devices::Host >::reduce( operation, v1.getSize(), Loading @@ -320,11 +320,11 @@ getVectorDifferenceSum( const Vector1& v1, } template< typename Vector > template< typename Vector, typename Scalar > void VectorOperations< Devices::Host >:: vectorScalarMultiplication( Vector& v, const typename Vector::RealType& alpha ) const Scalar alpha ) { typedef typename Vector::IndexType Index; Loading Loading @@ -355,13 +355,13 @@ getScalarProduct( const Vector1& v1, v2.getData() ); } template< typename Vector1, typename Vector2 > template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 > void VectorOperations< Devices::Host >:: addVector( Vector1& y, const Vector2& x, const typename Vector2::RealType& alpha, const typename Vector1::RealType& thisMultiplicator ) const Scalar1 alpha, const Scalar2 thisMultiplicator ) { typedef typename Vector1::IndexType Index; Loading @@ -384,17 +384,16 @@ addVector( Vector1& y, y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ]; } template< typename Vector1, typename Vector2, typename Vector3 > template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 > void VectorOperations< Devices::Host >:: addVectors( Vector1& v, const Vector2& v1, const typename Vector2::RealType& multiplicator1, const Scalar1 multiplicator1, const Vector3& v2, const typename Vector3::RealType& multiplicator2, const typename Vector1::RealType& thisMultiplicator ) const Scalar2 multiplicator2, const Scalar3 thisMultiplicator ) { typedef typename Vector1::IndexType Index; Loading
src/TNL/Containers/Algorithms/VectorOperationsMIC_impl.h +22 −24 File changed.Preview size limit exceeded, changes collapsed. Show changes