Rewritting lambdas with references. (4b52cc2b) · Commits · TNL / tnl-dev

src/TNL/Containers/Algorithms/ArrayOperationsCuda.hpp

+12 −17

Original line number	Diff line number	Diff line
		@@ -205,12 +205,10 @@ compareMemory( const Element1* destination,
		TNL_ASSERT_TRUE( destination, "Attempted to compare data through a nullptr." );
		TNL_ASSERT_TRUE( source, "Attempted to compare data through a nullptr." );

		auto fetch = [=] __cuda_callable__ ( Index i ) { return ( destination[ i ] == source[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; };
		return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true );

		/*Algorithms::ParallelReductionEqualities< Element1, Element2 > reductionEqualities;
		return Reduction< Devices::Cuda >::reduce( reductionEqualities, size, destination, source );*/
		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( destination[ i ] == source[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; };
		//return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true );
		}

		template< typename Element,
		@@ -225,9 +223,10 @@ containsValue( const Element* data,
		TNL_ASSERT_GE( size, 0, "" );

		if( size == 0 ) return false;
		auto fetch = [=] __cuda_callable__ ( Index i ) { return ( data[ i ] == value ); };
		auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a \|\| b; };
		return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, false );
		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( data[ i ] == value ); };
		auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a \|= b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a \|= b; };
		//return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, false );
		}

		template< typename Element,
		@@ -243,14 +242,10 @@ containsOnlyValue( const Element* data,
		if( size == 0 ) return false;

		if( size == 0 ) return false;
		auto fetch = [=] __cuda_callable__ ( Index i ) { return ( data[ i ] == value ); };
		auto reduction = [=] __cuda_callable__ ( const bool a, const bool b ) { return a && b; };
		return Reduction< Devices::Cuda >::reduce( size, reduction, fetch, true );


		/*Algorithms::ParallelReductionContainsOnlyValue< Element > reductionContainsOnlyValue;
		reductionContainsOnlyValue.setValue( value );
		return Reduction< Devices::Cuda >::reduce( reductionContainsOnlyValue, size, data, nullptr );*/
		auto fetch = [=] __cuda_callable__ ( Index i ) -> bool { return ( data[ i ] == value ); };
		auto reduction = [=] __cuda_callable__ ( bool& a, const bool& b ) { a &= b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile bool& a, volatile bool& b ) { a &= b; };
		//return Reduction< Devices::Cuda >::reduce( size, reduction, volatileReduction, fetch, true );
		}

src/TNL/Containers/Algorithms/CommonVectorOperations.hpp

0 → 100644

+375 −0

Original line number	Diff line number	Diff line
		/***************************************************************************
		CommonVectorOperations.hpp - description
		-------------------
		begin : Apr 12, 2019
		copyright : (C) 2019 by Tomas Oberhuber
		email : tomas.oberhuber@fjfi.cvut.cz
		***************************************************************************/

		/* See Copyright Notice in tnl/Copyright */

		#pragma once

		#include <TNL/Containers/Algorithms/CommonVectorOperations.h>
		#include <TNL/Containers/Algorithms/Reduction.h>

		namespace TNL {
		namespace Containers {
		namespace Algorithms {

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorMax( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, -std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorMin( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorAbsMax( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, -std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorAbsMin( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorL1Norm( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorL2Norm( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data[ i ] * data[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return std::sqrt( Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) );
		}

		template< typename Device >
		template< typename Vector, typename ResultType, typename Scalar >
		ResultType
		CommonVectorOperations< Device >::
		getVectorLpNorm( const Vector& v,
		const Scalar p )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		if( p == 1.0 )
		return getVectorL1Norm< Vector, ResultType >( v );
		if( p == 2.0 )
		return getVectorL2Norm< Vector, ResultType >( v );

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data[ i ] ), p ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return std::pow( Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p );
		}

		template< typename Device >
		template< typename Vector, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorSum( const Vector& v )
		{
		TNL_ASSERT_GT( v.getSize(), 0, "Vector size must be positive." );

		if( std::is_same< ResultType, bool >::value )
		abort();

		using RealType = typename Vector::RealType;
		using IndexType = typename Vector::IndexType;

		const auto* data = v.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) -> ResultType { return data[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceMax( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, -std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceMin( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceAbsMax( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::max( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::max( a, b ); };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, -std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceAbsMin( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a = TNL::min( a, b ); };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a = TNL::min( a, b ); };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, std::numeric_limits< ResultType >::max() );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceL1Norm( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::abs( data1[ i ] - data2[ i ] ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceL2Norm( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) {
		auto diff = data1[ i ] - data2[ i ];
		return diff * diff;
		};
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return std::sqrt( Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ) );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType, typename Scalar >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceLpNorm( const Vector1& v1,
		const Vector2& v2,
		const Scalar p )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );
		TNL_ASSERT_GE( p, 1.0, "Parameter of the L^p norm must be at least 1.0." );

		if( p == 1.0 )
		return getVectorDifferenceL1Norm< Vector1, Vector2, ResultType >( v1, v2 );
		if( p == 2.0 )
		return getVectorDifferenceL2Norm< Vector1, Vector2, ResultType >( v1, v2 );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return TNL::pow( TNL::abs( data1[ i ] - data2[ i ] ), p ); };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return std::pow( Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 ), 1.0 / p );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getVectorDifferenceSum( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] - data2[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		template< typename Device >
		template< typename Vector1, typename Vector2, typename ResultType >
		ResultType
		CommonVectorOperations< Device >::
		getScalarProduct( const Vector1& v1,
		const Vector2& v2 )
		{
		TNL_ASSERT_GT( v1.getSize(), 0, "Vector size must be positive." );
		TNL_ASSERT_EQ( v1.getSize(), v2.getSize(), "The vector sizes must be the same." );

		using RealType = typename Vector1::RealType;
		using IndexType = typename Vector1::IndexType;

		const auto* data1 = v1.getData();
		const auto* data2 = v2.getData();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return data1[ i ] * data2[ i ]; };
		auto reduction = [=] __cuda_callable__ ( ResultType& a, const ResultType& b ) { a += b; };
		auto volatileReduction = [=] __cuda_callable__ ( volatile ResultType& a, volatile ResultType& b ) { a += b; };
		return Reduction< DeviceType >::reduce( v1.getSize(), reduction, volatileReduction, fetch, ( ResultType ) 0 );
		}

		} // namespace Algorithms
		} // namespace Containers
		} // namespace TNL

src/TNL/Containers/Algorithms/CudaReductionKernel.h

+38 −34

File changed.

Preview size limit exceeded, changes collapsed.

src/TNL/Containers/Algorithms/Reduction.h

+7 −1

Original line number	Diff line number	Diff line
		@@ -32,10 +32,12 @@ public:
		template< typename Index,
		typename Result,
		typename ReductionOperation,
		typename VolatileReductionOperation,
		typename DataFetcher >
		static Result
		reduce( const Index size,
		ReductionOperation& reduction,
		VolatileReductionOperation& volatileReduction,
		DataFetcher& dataFetcher,
		const Result& zero );
		};
		@@ -47,10 +49,12 @@ public:
		template< typename Index,
		typename Result,
		typename ReductionOperation,
		typename VolatileReductionOperation,
		typename DataFetcher >
		static Result
		reduce( const Index size,
		ReductionOperation& reduction,
		VolatileReductionOperation& volatileReduction,
		DataFetcher& dataFetcher,
		const Result& zero );
		};
		@@ -62,10 +66,12 @@ public:
		template< typename Index,
		typename Result,
		typename ReductionOperation,
		typename VolatileReductionOperation,
		typename DataFetcher >
		static Result
		reduce( const Index size,
		ReductionOperation& reduction,
		VolatileReductionOperation& volatileReduction,
		DataFetcher& dataFetcher,
		const Result& zero );
		};
		@@ -74,4 +80,4 @@ public:
		} // namespace Containers
		} // namespace TNL

		#include "Reduction_impl.h"
		#include "Reduction.hpp"

src/TNL/Containers/Algorithms/Reduction_impl.h→src/TNL/Containers/Algorithms/Reduction.hpp

+33 −45

File changed and moved.

Preview size limit exceeded, changes collapsed.