Changed return type of reduction methods from bool to void (2a153046) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/Multireduction.h

+4 −4

Original line number	Diff line number	Diff line
		@@ -30,7 +30,7 @@ class Multireduction< Devices::Cuda >
		{
		public:
		template< typename Operation, typename Index >
		static bool
		static void
		reduce( Operation& operation,
		const int n,
		const Index size,
		@@ -45,7 +45,7 @@ class Multireduction< Devices::Host >
		{
		public:
		template< typename Operation, typename Index >
		static bool
		static void
		reduce( Operation& operation,
		const int n,
		const Index size,
		@@ -60,7 +60,7 @@ class Multireduction< Devices::MIC >
		{
		public:
		template< typename Operation, typename Index >
		static bool
		static void
		reduce( Operation& operation,
		const int n,
		const Index size,

src/TNL/Containers/Algorithms/Multireduction_impl.h

+7 −11

Original line number	Diff line number	Diff line
		@@ -49,7 +49,7 @@ static constexpr int Multireduction_minGpuDataSize = 256;//65536; //16384;//1024
		* hostResult: output array of size = n
		*/
		template< typename Operation, typename Index >
		bool
		void
		Multireduction< Devices::Cuda >::
		reduce( Operation& operation,
		const int n,
		@@ -80,11 +80,12 @@ reduce( Operation& operation,
		using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type;
		_DT2 hostArray2[ Multireduction_minGpuDataSize ];
		ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size );
		return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult );
		Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, hostArray2, hostResult );
		}
		else {
		return Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, (DataType2*) nullptr, hostResult );
		Multireduction< Devices::Host >::reduce( operation, n, size, hostArray1, ldInput1, (DataType2*) nullptr, hostResult );
		}
		return;
		}

		#ifdef CUDA_REDUCTION_PROFILING
		@@ -144,7 +145,6 @@ reduce( Operation& operation,
		#endif

		TNL_CHECK_CUDA_DEVICE;
		return true;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		@@ -161,7 +161,7 @@ reduce( Operation& operation,
		* hostResult: output array of size = n
		*/
		template< typename Operation, typename Index >
		bool
		void
		Multireduction< Devices::Host >::
		reduce( Operation& operation,
		const int n,
		@@ -247,12 +247,10 @@ reduce( Operation& operation,
		#ifdef HAVE_OPENMP
		}
		#endif

		return true;
		}

		template< typename Operation, typename Index >
		bool
		void
		Multireduction< Devices::MIC >::
		reduce( Operation& operation,
		const int n,
		@@ -265,11 +263,9 @@ reduce( Operation& operation,
		TNL_ASSERT( n > 0, );
		TNL_ASSERT( size <= ldInput1, );

		std::cout << "Not Implemented yet Multireduction< Devices::MIC >::reduce" << std::endl;
		return true;
		throw std::runtime_error("Not Implemented yet Multireduction< Devices::MIC >::reduce");
		}


		} // namespace Algorithms
		} // namespace Containers
		} // namespace TNL

src/TNL/Containers/Algorithms/Reduction.h

+3 −3

Original line number	Diff line number	Diff line
		@@ -30,7 +30,7 @@ class Reduction< Devices::Cuda >
		{
		public:
		template< typename Operation, typename Index >
		static bool
		static void
		reduce( Operation& operation,
		const Index size,
		const typename Operation::DataType1* deviceInput1,
		@@ -43,7 +43,7 @@ class Reduction< Devices::Host >
		{
		public:
		template< typename Operation, typename Index >
		static bool
		static void
		reduce( Operation& operation,
		const Index size,
		const typename Operation::DataType1* deviceInput1,
		@@ -56,7 +56,7 @@ class Reduction< Devices::MIC >
		{
		public:
		template< typename Operation, typename Index >
		static bool
		static void
		reduce( Operation& operation,
		const Index size,
		const typename Operation::DataType1* deviceInput1,

src/TNL/Containers/Algorithms/Reduction_impl.h

+8 −10

Original line number	Diff line number	Diff line
		@@ -39,7 +39,7 @@ namespace Algorithms {
		static constexpr int Reduction_minGpuDataSize = 256;//65536; //16384;//1024;//256;

		template< typename Operation, typename Index >
		bool
		void
		Reduction< Devices::Cuda >::
		reduce( Operation& operation,
		const Index size,
		@@ -75,11 +75,12 @@ reduce( Operation& operation,
		using _DT2 = typename std::conditional< std::is_same< DataType2, void >::value, DataType1, DataType2 >::type;
		typename std::remove_const< _DT2 >::type hostArray2[ Reduction_minGpuDataSize ];
		ArrayOperations< Devices::Host, Devices::Cuda >::copyMemory( hostArray2, (_DT2*) deviceInput2, size );
		return Reduction< Devices::Host >::reduce( operation, size, hostArray1, hostArray2, result );
		Reduction< Devices::Host >::reduce( operation, size, hostArray1, hostArray2, result );
		}
		else {
		return Reduction< Devices::Host >::reduce( operation, size, hostArray1, (DataType2*) nullptr, result );
		Reduction< Devices::Host >::reduce( operation, size, hostArray1, (DataType2*) nullptr, result );
		}
		return;
		}

		#ifdef CUDA_REDUCTION_PROFILING
		@@ -160,14 +161,13 @@ reduce( Operation& operation,
		}

		TNL_CHECK_CUDA_DEVICE;
		return true;
		#else
		throw Exceptions::CudaSupportMissing();
		#endif
		};

		template< typename Operation, typename Index >
		bool
		void
		Reduction< Devices::Host >::
		reduce( Operation& operation,
		const Index size,
		@@ -224,8 +224,6 @@ reduce( Operation& operation,
		#ifdef HAVE_OPENMP
		}
		#endif

		return true;
		}

		} // namespace Algorithms

src/TNL/Solvers/Linear/CWYGMRES_impl.h

+4 −12

Original line number	Diff line number	Diff line
		@@ -398,18 +398,14 @@ hauseholder_generate( DeviceVector& Y,
		// aux = Y_{i-1}^T * y_i
		RealType aux[ i ];
		Containers::Algorithms::ParallelReductionScalarProduct< RealType, RealType > scalarProduct;
		if( ! Containers::Algorithms::Multireduction< DeviceType >::reduce
		Containers::Algorithms::Multireduction< DeviceType >::reduce
		( scalarProduct,
		i,
		size,
		Y.getData(),
		ldSize,
		y_i.getData(),
		aux ) )
		{
		std::cerr << "multireduction failed" << std::endl;
		throw 1;
		}
		aux );

		// [T_i]_{0..i-1} = - T_{i-1} * t_i * aux
		for( int k = 0; k < i; k++ ) {
		@@ -497,18 +493,14 @@ hauseholder_cwy_transposed( DeviceVector& z,
		// aux = Y_i^T * w
		RealType aux[ i + 1 ];
		Containers::Algorithms::ParallelReductionScalarProduct< RealType, RealType > scalarProduct;
		if( ! Containers::Algorithms::Multireduction< DeviceType >::reduce
		Containers::Algorithms::Multireduction< DeviceType >::reduce
		( scalarProduct,
		i + 1,
		size,
		Y.getData(),
		ldSize,
		w.getData(),
		aux ) )
		{
		std::cerr << "multireduction failed" << std::endl;
		throw 1;
		}
		aux );

		// aux = T_i^T * aux
		// Note that T_i^T is lower triangular, so we can overwrite the aux vector with the result in place