Merge branch 'reduction' into 'develop' (6f291b2b) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/ArrayAssignment.h

+0 −2

Original line number	Diff line number	Diff line
		@@ -86,8 +86,6 @@ struct ArrayAssignment< Array, T, false >

		};



		} // namespace Algorithms
		} // namespace Containers
		} // namespace TNL

src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp

+16 −9

Original line number	Diff line number	Diff line
		@@ -193,7 +193,6 @@ copySTLList( DestinationElement* destination,
		copiedElements += copySize;
		}
		}

		template< typename Element1,
		typename Element2,
		typename Index >
		@@ -205,8 +204,11 @@ compareMemory( const Element1* destination,
		{
		TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );
		Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities;
		return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source );

		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( destination[ i ] == source[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; };
		return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true );
		}

		template< typename Element,
		@@ -219,10 +221,12 @@ containsValue( const Element* data,
		{
		TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
		TNL_ASSERT_GE( size, 0, "" );

		if( size == 0 ) return false;
		Algorithms::ParallelReductionContainsValue< Element > reductionContainsValue;
		reductionContainsValue.setValue( value );
		return Reduction< Devices::Cuda >::reduce( reductionContainsValue, size, data, nullptr );
		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( data[ i ] == value ); };
		auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a \|= b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a \|= b; };
		return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, false );
		}

		template< typename Element,
		@@ -236,9 +240,12 @@ containsOnlyValue( const Element* data,
		TNL_ASSERT_TRUE( data, "Attempted to check data through a nullptr." );
		TNL_ASSERT_GE( size, 0, "" );
		if( size == 0 ) return false;
		Algorithms::ParallelReductionContainsOnlyValue< Element > reductionContainsOnlyValue;
		reductionContainsOnlyValue.setValue( value );
		return Reduction< Devices::Cuda >::reduce( reductionContainsOnlyValue, size, data, nullptr );

		if( size == 0 ) return false;
		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( data[ i ] == value ); };
		auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; };
		return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true );
		}

src/TNL/Containers/Algorithms/CommonVectorOperations.h

0 → 100644

+79 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		CommonVectorOperations.h - description
		-------------------
		begin : Apr 12, 2019
		copyright : (C) 2019 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		namespace TNL {
		namespace Containers {
		namespace Algorithms {

		template< typename Device >
		struct CommonVectorOperations
		{
		using DeviceType = Device;

		template< typename Vector, typename ResultType = typename Vector::RealType >
		static ResultType getVectorMax( const Vector& v );

		template< typename Vector, typename ResultType = typename Vector::RealType >
		static ResultType getVectorMin( const Vector& v );

		template< typename Vector, typename ResultType = typename Vector::RealType >
		static ResultType getVectorAbsMax( const Vector& v );

		template< typename Vector, typename ResultType = typename Vector::RealType >
		static ResultType getVectorAbsMin( const Vector& v );

		template< typename Vector, typename ResultType = typename Vector::RealType >
		static ResultType getVectorL1Norm( const Vector& v );

		template< typename Vector, typename ResultType = typename Vector::RealType >
		static ResultType getVectorL2Norm( const Vector& v );

		template< typename Vector, typename ResultType = typename Vector::RealType, typename Scalar >
		static ResultType getVectorLpNorm( const Vector& v, const Scalar p );

		template< typename Vector, typename ResultType = typename Vector::RealType >
		static ResultType getVectorSum( const Vector& v );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
		static ResultType getVectorDifferenceMax( const Vector1& v1, const Vector2& v2 );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
		static ResultType getVectorDifferenceMin( const Vector1& v1, const Vector2& v2 );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
		static ResultType getVectorDifferenceAbsMax( const Vector1& v1, const Vector2& v2 );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
		static ResultType getVectorDifferenceAbsMin( const Vector1& v1, const Vector2& v2 );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
		static ResultType getVectorDifferenceL1Norm( const Vector1& v1, const Vector2& v2 );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
		static ResultType getVectorDifferenceL2Norm( const Vector1& v1, const Vector2& v2 );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType, typename Scalar >
		static ResultType getVectorDifferenceLpNorm( const Vector1& v1, const Vector2& v2, const Scalar p );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
		static ResultType getVectorDifferenceSum( const Vector1& v1, const Vector2& v2 );

		template< typename Vector1, typename Vector2, typename ResultType = typename Vector1::RealType >
		static ResultType getScalarProduct( const Vector1& v1, const Vector2& v2 );

		};

		} // namespace Algorithms
		} // namespace Containers
		} // namespace TNL

		#include <TNL/Containers/Algorithms/CommonVectorOperations.hpp>

src/TNL/Containers/Algorithms/CommonVectorOperations.hpp

0 → 100644

+375 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		CommonVectorOperations.hpp - description
		-------------------
		begin : Apr 12, 2019
		copyright : (C) 2019 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		#include <TNL/Containers/Algorithms/CommonVectorOperations.h>
		#include <TNL/Containers/Algorithms/Reduction.h>

		namespace TNL {
		namespace Containers {
		namespace Algorithms {

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorMax( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorMin( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorAbsMax( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorAbsMin( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorL1Norm( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorL2Norm( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return std::sqrt( Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) );
		}

		template< typename Device >
		template< typename Vector, typename ResultType, typename Scalar >
		ResultType
		CommonVectorOperations< Device >::
		getVectorLpNorm( const Vector& v,
		const Scalar p )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		if( p == 1.0 )
		return getVectorL1Norm< Vector, ResultType >( v );
		if( p == 2.0 )
		return getVectorL2Norm< Vector, ResultType >( v );

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return std::pow( Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorSum( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		if( std::is_same< ResultType, bool >::value )
		abort();

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceMax( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceMin( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceAbsMax( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::lowest() );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceAbsMin( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceL1Norm( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceL2Norm( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) {
		auto diff = data1[ i ] - data2[ i ];
		return diff * diff;
		};
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return std::sqrt( Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType, typename Scalar >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceLpNorm( const Vector1& v1,
		const Vector2& v2,
		const Scalar p )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
		TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );

		if( p == 1.0 )
		return getVectorDifferenceL1Norm< Vector1, Vector2, ResultType >( v1, v2 );
		if( p == 2.0 )
		return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return std::pow( Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceSum( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getScalarProduct( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		} // namespace Algorithms
		} // namespace Containers
		} // namespace TNL

src/TNL/Containers/Algorithms/CudaReductionKernel.h

+188 −122

File changed.

Preview size limit exceeded, changes collapsed.