Commit 0813dd89 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Fixed scalar types in vector operations

This allows int-vectors to be multiplied by float-scalars, producing
accurate int values (e.g. 4.2 * 10 == 42).
parent 9629ba95
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ void export_Vector(py::module & m, const char* name)
        .def("differenceLpNorm", &VectorType::template differenceLpNorm<double, VectorType, double>)
        .def("differenceSum", &VectorType::template differenceSum<double, VectorType>)
        .def("scalarProduct", &VectorType::template scalarProduct<VectorType>)
        .def("addVector", &VectorType::template addVector<VectorType>)
        .def("addVectors", &VectorType::template addVectors<VectorType>)
        .def("addVector", &VectorType::template addVector<VectorType, double, double>)
        .def("addVectors", &VectorType::template addVectors<VectorType, VectorType, double, double, double>)
    ;
}
+45 −51
Original line number Diff line number Diff line
@@ -30,11 +30,11 @@ public:
                           const typename Vector::IndexType i,
                           const typename Vector::RealType& value );

   template< typename Vector >
   template< typename Vector, typename Scalar >
   static void addElement( Vector& v,
                           const typename Vector::IndexType i,
                           const typename Vector::RealType& value,
                           const typename Vector::RealType& thisElementMultiplicator );
                           const Scalar thisElementMultiplicator );

   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorMax( const Vector& v );
@@ -54,9 +54,8 @@ public:
   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorL2Norm( const Vector& v );

   template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ >
   static ResultType getVectorLpNorm( const Vector& v,
                                      const Real_ p );
   template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar >
   static ResultType getVectorLpNorm( const Vector& v, const Scalar p );

   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorSum( const Vector& v );
@@ -79,32 +78,31 @@ public:
   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ >
   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p );
   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar >
   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 );

   template< typename Vector >
   static void vectorScalarMultiplication( Vector& v,
                                           const typename Vector::RealType& alpha );
   template< typename Vector, typename Scalar >
   static void vectorScalarMultiplication( Vector& v, Scalar alpha );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 );

   template< typename Vector1, typename Vector2 >
   template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 >
   static void addVector( Vector1& y,
                          const Vector2& x,
                          const typename Vector2::RealType& alpha,
                          const typename Vector1::RealType& thisMultiplicator = 1.0 );
                          const Scalar1 alpha,
                          const Scalar2 thisMultiplicator = 1.0 );

   template< typename Vector1, typename Vector2, typename Vector3 >
   template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 >
   static void addVectors( Vector1& v,
                           const Vector2& v1,
                           const typename Vector2::RealType& multiplicator1,
                           const Scalar1 multiplicator1,
                           const Vector3& v2,
                           const typename Vector3::RealType& multiplicator2,
                           const typename Vector1::RealType& thisMultiplicator = 1.0 );
                           const Scalar2 multiplicator2,
                           const Scalar3 thisMultiplicator = 1.0 );

   template< typename Vector >
   static void computePrefixSum( Vector& v,
@@ -126,11 +124,11 @@ public:
                           const typename Vector::IndexType i,
                           const typename Vector::RealType& value );

   template< typename Vector >
   template< typename Vector, typename Scalar >
   static void addElement( Vector& v,
                           const typename Vector::IndexType i,
                           const typename Vector::RealType& value,
                           const typename Vector::RealType& thisElementMultiplicator );
                           const Scalar thisElementMultiplicator );

   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorMax( const Vector& v );
@@ -150,9 +148,8 @@ public:
   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorL2Norm( const Vector& v );

   template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ >
   static ResultType getVectorLpNorm( const Vector& v,
                                      const Real_ p );
   template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar >
   static ResultType getVectorLpNorm( const Vector& v, const Scalar p );

   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorSum( const Vector& v );
@@ -175,32 +172,31 @@ public:
   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ >
   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p );
   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar >
   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 );

   template< typename Vector >
   static void vectorScalarMultiplication( Vector& v,
                                           const typename Vector::RealType& alpha );
   template< typename Vector, typename Scalar >
   static void vectorScalarMultiplication( Vector& v, const Scalar alpha );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 );

   template< typename Vector1, typename Vector2 >
   template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 >
   static void addVector( Vector1& y,
                          const Vector2& x,
                          const typename Vector2::RealType& alpha,
                          const typename Vector1::RealType& thisMultiplicator = 1.0 );
                          const Scalar1 alpha,
                          const Scalar2 thisMultiplicator = 1.0 );

   template< typename Vector1, typename Vector2, typename Vector3 >
   template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 >
   static void addVectors( Vector1& v,
                           const Vector2& v1,
                           const typename Vector2::RealType& multiplicator1,
                           const Scalar1 multiplicator1,
                           const Vector3& v2,
                           const typename Vector3::RealType& multiplicator2,
                           const typename Vector1::RealType& thisMultiplicator = 1.0 );
                           const Scalar2 multiplicator2,
                           const Scalar3 thisMultiplicator = 1.0 );

   template< typename Vector >
   static void computePrefixSum( Vector& v,
@@ -223,11 +219,11 @@ public:
                           const typename Vector::IndexType i,
                           const typename Vector::RealType& value );

   template< typename Vector >
   template< typename Vector, typename Scalar >
   static void addElement( Vector& v,
                           const typename Vector::IndexType i,
                           const typename Vector::RealType& value,
                           const typename Vector::RealType& thisElementMultiplicator );
                           const Scalar thisElementMultiplicator );

   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorMax( const Vector& v );
@@ -247,9 +243,8 @@ public:
   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorL2Norm( const Vector& v );

   template< typename Vector, typename ResultType = typename Vector::RealType, typename Real_ >
   static ResultType getVectorLpNorm( const Vector& v,
                                      const Real_ p );
   template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar >
   static ResultType getVectorLpNorm( const Vector& v, const Scalar p );

   template< typename Vector, typename ResultType = typename Vector::RealType >
   static ResultType getVectorSum( const Vector& v );
@@ -272,32 +267,31 @@ public:
   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Real_ >
   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Real_ p );
   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar >
   static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 );

   template< typename Vector >
   static void vectorScalarMultiplication( Vector& v,
                                           const typename Vector::RealType& alpha );
   template< typename Vector, typename Scalar >
   static void vectorScalarMultiplication( Vector& v, const Scalar alpha );

   template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
   static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 );

   template< typename Vector1, typename Vector2 >
   template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 >
   static void addVector( Vector1& y,
                          const Vector2& x,
                          const typename Vector2::RealType& alpha,
                          const typename Vector1::RealType& thisMultiplicator = 1.0 );
                          const Scalar1 alpha,
                          const Scalar2 thisMultiplicator = 1.0 );

   template< typename Vector1, typename Vector2, typename Vector3 >
   template< typename Vector1, typename Vector2, typename Vector3, typename Scalar1, typename Scalar2, typename Scalar3 >
   static void addVectors( Vector1& v,
                           const Vector2& v1,
                           const typename Vector2::RealType& multiplicator1,
                           const Scalar1 multiplicator1,
                           const Vector3& v2,
                           const typename Vector3::RealType& multiplicator2,
                           const typename Vector1::RealType& thisMultiplicator = 1.0 );
                           const Scalar2 multiplicator2,
                           const Scalar3 thisMultiplicator = 1.0 );

   template< typename Vector >
   static void computePrefixSum( Vector& v,
+33 −35
Original line number Diff line number Diff line
@@ -29,13 +29,13 @@ addElement( Vector& v,
   v[ i ] += value;
}

template< typename Vector >
template< typename Vector, typename Scalar >
void
VectorOperations< Devices::Cuda >::
addElement( Vector& v,
            const typename Vector::IndexType i,
            const typename Vector::RealType& value,
            const typename Vector::RealType& thisElementMultiplicator )
            const Scalar thisElementMultiplicator )
{
   v[ i ] = thisElementMultiplicator * v[ i ] + value;
}
@@ -137,11 +137,11 @@ getVectorL2Norm( const Vector& v )
   return std::sqrt( result );
}

template< typename Vector, typename ResultType, typename Real_ >
template< typename Vector, typename ResultType, typename Scalar >
ResultType
VectorOperations< Devices::Cuda >::
getVectorLpNorm( const Vector& v,
                 const Real_ p )
                 const Scalar p )
{
   typedef typename Vector::RealType Real;

@@ -153,7 +153,7 @@ getVectorLpNorm( const Vector& v,
   if( p == 2 )
      return getVectorL2Norm< Vector, ResultType >( v );

   Algorithms::ParallelReductionLpNorm< Real, ResultType, Real_ > operation;
   Algorithms::ParallelReductionLpNorm< Real, ResultType, Scalar > operation;
   operation.setPower( p );
   const ResultType result = Reduction< Devices::Cuda >::reduce( operation,
                                                                 v.getSize(),
@@ -276,12 +276,12 @@ getVectorDifferenceL2Norm( const Vector1& v1,
   return std::sqrt( result );
}

template< typename Vector1, typename Vector2, typename ResultType, typename Real_ >
template< typename Vector1, typename Vector2, typename ResultType, typename Scalar >
ResultType
VectorOperations< Devices::Cuda >::
getVectorDifferenceLpNorm( const Vector1& v1,
                           const Vector2& v2,
                           const Real_ p )
                           const Scalar p )
{
   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
@@ -292,7 +292,7 @@ getVectorDifferenceLpNorm( const Vector1& v1,
   if( p == 2.0 )
      return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 );

   Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Real_ > operation;
   Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Scalar > operation;
   operation.setPower( p );
   const ResultType result = Reduction< Devices::Cuda >::reduce( operation,
                                                                 v1.getSize(),
@@ -318,11 +318,11 @@ getVectorDifferenceSum( const Vector1& v1,
}

#ifdef HAVE_CUDA
template< typename Real, typename Index >
template< typename Real, typename Index, typename Scalar >
__global__ void
vectorScalarMultiplicationCudaKernel( Real* data,
                                      Index size,
                                      Real alpha )
                                      Scalar alpha )
{
   Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x;
   const Index maxGridSize = blockDim.x * gridDim.x;
@@ -334,11 +334,11 @@ vectorScalarMultiplicationCudaKernel( Real* data,
}
#endif

template< typename Vector >
template< typename Vector, typename Scalar >
void
VectorOperations< Devices::Cuda >::
vectorScalarMultiplication( Vector& v,
                            const typename Vector::RealType& alpha )
                            const Scalar alpha )
{
   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

@@ -376,14 +376,13 @@ getScalarProduct( const Vector1& v1,
}

#ifdef HAVE_CUDA
template< typename Real,
          typename Index >
template< typename Real1, typename Real2, typename Index, typename Scalar1, typename Scalar2 >
__global__ void
vectorAddVectorCudaKernel( Real* y,
                           const Real* x,
vectorAddVectorCudaKernel( Real1* y,
                           const Real2* x,
                           const Index size,
                           const Real alpha,
                           const Real thisMultiplicator )
                           const Scalar1 alpha,
                           const Scalar2 thisMultiplicator )
{
   Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x;
   const Index maxGridSize = blockDim.x * gridDim.x;
@@ -402,13 +401,13 @@ vectorAddVectorCudaKernel( Real* y,
}
#endif

template< typename Vector1, typename Vector2 >
template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 >
void
VectorOperations< Devices::Cuda >::
addVector( Vector1& y,
           const Vector2& x,
           const typename Vector2::RealType& alpha,
           const typename Vector1::RealType& thisMultiplicator )
           const Scalar1 alpha,
           const Scalar2 thisMultiplicator )
{
   TNL_ASSERT_GT( x.getSize(), 0, "Vector size must be positive." );
   TNL_ASSERT_EQ( x.getSize(), y.getSize(), "The vector sizes must be the same." );
@@ -435,16 +434,16 @@ addVector( Vector1& y,
}

#ifdef HAVE_CUDA
template< typename Real,
          typename Index >
template< typename Real1, typename Real2, typename Real3, typename Index,
          typename Scalar1, typename Scalar2, typename Scalar3 >
__global__ void
vectorAddVectorsCudaKernel( Real* v,
                            const Real* v1,
                            const Real* v2,
vectorAddVectorsCudaKernel( Real1* v,
                            const Real2* v1,
                            const Real3* v2,
                            const Index size,
                            const Real multiplicator1,
                            const Real multiplicator2,
                            const Real thisMultiplicator )
                            const Scalar1 multiplicator1,
                            const Scalar2 multiplicator2,
                            const Scalar3 thisMultiplicator )
{
   Index elementIdx = blockDim.x * blockIdx.x + threadIdx.x;
   const Index maxGridSize = blockDim.x * gridDim.x;
@@ -466,17 +465,16 @@ vectorAddVectorsCudaKernel( Real* v,
}
#endif

template< typename Vector1,
          typename Vector2,
          typename Vector3 >
template< typename Vector1, typename Vector2, typename Vector3,
          typename Scalar1, typename Scalar2, typename Scalar3 >
void
VectorOperations< Devices::Cuda >::
addVectors( Vector1& v,
            const Vector2& v1,
            const typename Vector2::RealType& multiplicator1,
            const Scalar1 multiplicator1,
            const Vector3& v2,
            const typename Vector3::RealType& multiplicator2,
            const typename Vector1::RealType& thisMultiplicator )
            const Scalar2 multiplicator2,
            const Scalar3 thisMultiplicator )
{
   TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
   TNL_ASSERT_EQ( v.getSize(), v1.getSize(), "The vector sizes must be the same." );
+19 −20
Original line number Diff line number Diff line
@@ -30,13 +30,13 @@ addElement( Vector& v,
   v[ i ] += value;
}

template< typename Vector >
template< typename Vector, typename Scalar >
void
VectorOperations< Devices::Host >::
addElement( Vector& v,
            const typename Vector::IndexType i,
            const typename Vector::RealType& value,
            const typename Vector::RealType& thisElementMultiplicator )
            const Scalar thisElementMultiplicator )
{
   v[ i ] = thisElementMultiplicator * v[ i ] + value;
}
@@ -139,11 +139,11 @@ getVectorL2Norm( const Vector& v )
   return std::sqrt( result );
}

template< typename Vector, typename ResultType, typename Real_ >
template< typename Vector, typename ResultType, typename Scalar >
ResultType
VectorOperations< Devices::Host >::
getVectorLpNorm( const Vector& v,
                 const Real_ p )
                 const Scalar p )
{
   typedef typename Vector::RealType Real;

@@ -155,7 +155,7 @@ getVectorLpNorm( const Vector& v,
   if( p == 2.0 )
      return getVectorL2Norm< Vector, ResultType >( v );

   Algorithms::ParallelReductionLpNorm< Real, ResultType, Real_ > operation;
   Algorithms::ParallelReductionLpNorm< Real, ResultType, Scalar > operation;
   operation.setPower( p );
   const ResultType result = Reduction< Devices::Host >::reduce( operation,
                                                                 v.getSize(),
@@ -278,12 +278,12 @@ getVectorDifferenceL2Norm( const Vector1& v1,
}


template< typename Vector1, typename Vector2, typename ResultType, typename Real_ >
template< typename Vector1, typename Vector2, typename ResultType, typename Scalar >
ResultType
VectorOperations< Devices::Host >::
getVectorDifferenceLpNorm( const Vector1& v1,
                           const Vector2& v2,
                           const Real_ p )
                           const Scalar p )
{
   TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
   TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
@@ -294,7 +294,7 @@ getVectorDifferenceLpNorm( const Vector1& v1,
   if( p == 2.0 )
      return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 );

   Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Real_ > operation;
   Algorithms::ParallelReductionDiffLpNorm< typename Vector1::RealType, typename Vector2::RealType, ResultType, Scalar > operation;
   operation.setPower( p );
   const ResultType result = Reduction< Devices::Host >::reduce( operation,
                                                                 v1.getSize(),
@@ -320,11 +320,11 @@ getVectorDifferenceSum( const Vector1& v1,
}


template< typename Vector >
template< typename Vector, typename Scalar >
void
VectorOperations< Devices::Host >::
vectorScalarMultiplication( Vector& v,
                            const typename Vector::RealType& alpha )
                            const Scalar alpha )
{
   typedef typename Vector::IndexType Index;

@@ -355,13 +355,13 @@ getScalarProduct( const Vector1& v1,
                                              v2.getData() );
}

template< typename Vector1, typename Vector2 >
template< typename Vector1, typename Vector2, typename Scalar1, typename Scalar2 >
void
VectorOperations< Devices::Host >::
addVector( Vector1& y,
           const Vector2& x,
           const typename Vector2::RealType& alpha,
           const typename Vector1::RealType& thisMultiplicator )
           const Scalar1 alpha,
           const Scalar2 thisMultiplicator )
{
   typedef typename Vector1::IndexType Index;

@@ -384,17 +384,16 @@ addVector( Vector1& y,
         y[ i ] = thisMultiplicator * y[ i ] + alpha * x[ i ];
}

template< typename Vector1,
          typename Vector2,
          typename Vector3 >
template< typename Vector1, typename Vector2, typename Vector3,
          typename Scalar1, typename Scalar2, typename Scalar3 >
void
VectorOperations< Devices::Host >::
addVectors( Vector1& v,
            const Vector2& v1,
            const typename Vector2::RealType& multiplicator1,
            const Scalar1 multiplicator1,
            const Vector3& v2,
            const typename Vector3::RealType& multiplicator2,
            const typename Vector1::RealType& thisMultiplicator )
            const Scalar2 multiplicator2,
            const Scalar3 thisMultiplicator )
{
   typedef typename Vector1::IndexType Index;

+22 −24

File changed.

Preview size limit exceeded, changes collapsed.

Loading