Commit 2a153046 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

Changed return type of reduction methods from bool to void

parent 53b18a91
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ class Multireduction< Devices::Cuda >
{
public:
   template< typename Operation, typename Index >
   static bool
   static void
   reduce( Operation& operation,
           const int n,
           const Index size,
@@ -45,7 +45,7 @@ class Multireduction< Devices::Host >
{
public:
   template< typename Operation, typename Index >
   static bool
   static void
   reduce( Operation& operation,
           const int n,
           const Index size,
@@ -60,7 +60,7 @@ class Multireduction< Devices::MIC >
{
public:
   template< typename Operation, typename Index >
   static bool
   static void
   reduce( Operation& operation,
           const int n,
           const Index size,
+7 −11
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ static constexpr int Multireduction_minGpuDataSize = 256;//65536; //16384;//1024
 *    hostResult: output array of size = n
 */
template< typename Operation, typename Index >
bool
void
Multireduction< Devices::Cuda >::
reduce( Operation& operation,
        const int n,
@@ -80,11 +80,12 @@ reduce( Operation& operation,
         using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type;
         _DT2 hostArray2[ Multireduction_minGpuDataSize ];
         ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size );
         return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult );
         Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult );
      }
      else {
         return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, (DataType2*) nullptr, hostResult );
         Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, (DataType2*) nullptr, hostResult );
      }
      return;
   }

   #ifdef CUDA_REDUCTION_PROFILING
@@ -144,7 +145,6 @@ reduce( Operation& operation,
   #endif

   TNL_CHECK_CUDA_DEVICE;
   return true;
#else
   throw Exceptions::CudaSupportMissing();
#endif
@@ -161,7 +161,7 @@ reduce( Operation& operation,
 *    hostResult: output array of size = n
 */
template< typename Operation, typename Index >
bool
void
Multireduction< Devices::Host >::
reduce( Operation& operation,
        const int n,
@@ -247,12 +247,10 @@ reduce( Operation& operation,
#ifdef HAVE_OPENMP
   }
#endif

   return true;
}

template< typename Operation, typename Index >
bool
void
Multireduction< Devices::MIC >::
reduce( Operation& operation,
        const int n,
@@ -265,11 +263,9 @@ reduce( Operation& operation,
   TNL_ASSERT( n > 0, );
   TNL_ASSERT( size <= ldInput1, );

   std::cout << "Not Implemented yet Multireduction< Devices::MIC >::reduce" << std::endl;
   return true;
   throw std::runtime_error("Not Implemented yet Multireduction< Devices::MIC >::reduce");
}


} // namespace Algorithms
} // namespace Containers
} // namespace TNL
+3 −3
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ class Reduction< Devices::Cuda >
{
public:
   template< typename Operation, typename Index >
   static bool
   static void
   reduce( Operation& operation,
           const Index size,
           const typename Operation::DataType1* deviceInput1,
@@ -43,7 +43,7 @@ class Reduction< Devices::Host >
{
public:
   template< typename Operation, typename Index >
   static bool
   static void
   reduce( Operation& operation,
           const Index size,
           const typename Operation::DataType1* deviceInput1,
@@ -56,7 +56,7 @@ class Reduction< Devices::MIC >
{
public:
   template< typename Operation, typename Index >
   static bool
   static void
   reduce( Operation& operation,
           const Index size,
           const typename Operation::DataType1* deviceInput1,
+8 −10
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@ namespace Algorithms {
static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//256;

template< typename Operation, typename Index >
bool
void
Reduction< Devices::Cuda >::
reduce( Operation& operation,
        const Index size,
@@ -75,11 +75,12 @@ reduce( Operation& operation,
         using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type;
         typename std::remove_const< _DT2 >::type hostArray2[ Reduction_minGpuDataSize ];
         ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size );
         return Reduction< Devices::Host >::reduce( operation, size, hostArray1, hostArray2, result );
         Reduction< Devices::Host >::reduce( operation, size, hostArray1, hostArray2, result );
      }
      else {
         return Reduction< Devices::Host >::reduce( operation, size, hostArray1, (DataType2*) nullptr, result );
         Reduction< Devices::Host >::reduce( operation, size, hostArray1, (DataType2*) nullptr, result );
      }
      return;
   }

   #ifdef CUDA_REDUCTION_PROFILING
@@ -160,14 +161,13 @@ reduce( Operation& operation,
   }

   TNL_CHECK_CUDA_DEVICE;
   return true;
#else
   throw Exceptions::CudaSupportMissing();
#endif
};

template< typename Operation, typename Index >
bool
void
Reduction< Devices::Host >::
reduce( Operation& operation,
        const Index size,
@@ -224,8 +224,6 @@ reduce( Operation& operation,
#ifdef HAVE_OPENMP
   }
#endif

   return true;
}

} // namespace Algorithms
+4 −12
Original line number Diff line number Diff line
@@ -398,18 +398,14 @@ hauseholder_generate( DeviceVector& Y,
      // aux = Y_{i-1}^T * y_i
      RealType aux[ i ];
      Containers::Algorithms::ParallelReductionScalarProduct< RealType, RealType > scalarProduct;
      if( ! Containers::Algorithms::Multireduction< DeviceType >::reduce
      Containers::Algorithms::Multireduction< DeviceType >::reduce
               ( scalarProduct,
                 i,
                 size,
                 Y.getData(),
                 ldSize,
                 y_i.getData(),
                 aux ) )
      {
         std::cerr << "multireduction failed" << std::endl;
         throw 1;
      }
                 aux );

      // [T_i]_{0..i-1} = - T_{i-1} * t_i * aux
      for( int k = 0; k < i; k++ ) {
@@ -497,18 +493,14 @@ hauseholder_cwy_transposed( DeviceVector& z,
   // aux = Y_i^T * w
   RealType aux[ i + 1 ];
   Containers::Algorithms::ParallelReductionScalarProduct< RealType, RealType > scalarProduct;
   if( ! Containers::Algorithms::Multireduction< DeviceType >::reduce
   Containers::Algorithms::Multireduction< DeviceType >::reduce
            ( scalarProduct,
              i + 1,
              size,
              Y.getData(),
              ldSize,
              w.getData(),
              aux ) )
   {
      std::cerr << "multireduction failed" << std::endl;
      throw 1;
   }
              aux );

   // aux = T_i^T * aux
   // Note that T_i^T is lower triangular, so we can overwrite the aux vector with the result in place