Merge branch 'JK/expression-templates' into 'develop' (5465c226) · Commits · TNL / tnl-dev

src/TNL/Algorithms/Segments/BiEllpackView.hpp

+1 −1

Original line number	Diff line number	Diff line
		@@ -561,7 +561,7 @@ segmentsReductionKernel( IndexType gridIdx,
		const int warpIdx = threadIdx.x / WarpSize;
		const int warpsCount = BlockDim / WarpSize;
		constexpr int groupsInStrip = 6; //getLogWarpSize() + 1;
		IndexType firstGroupIdx = strip * groupsInStrip;
		//IndexType firstGroupIdx = strip * groupsInStrip;
		IndexType firstGroupInBlock = 8 * ( strip / 8 ) * groupsInStrip;
		IndexType groupHeight = getWarpSize();

src/TNL/Atomic.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -63,6 +63,7 @@ public:
		// this copy-constructor and copy-assignment operator are not atomic as they
		// synchronize only with respect to one or the other object.
		Atomic( const Atomic& desired ) noexcept
		: std::atomic< T >()
		{
		this->store(desired.load());
		}

src/TNL/Containers/DistributedArray.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -50,7 +50,7 @@ public:

		DistributedArray() = default;

		DistributedArray( DistributedArray& ) = default;
		DistributedArray( const DistributedArray& ) = default;

		DistributedArray( LocalRangeType localRange, Index globalSize, CommunicationGroup group = Communicator::AllGroup );

src/TNL/Containers/DistributedVector.h

+20 −0

Original line number	Diff line number	Diff line
		@@ -51,6 +51,26 @@ public:
		using BaseType::DistributedArray;
		using BaseType::operator=;

		/**
		* \brief Copy constructor (makes a deep copy).
		*/
		explicit DistributedVector( const DistributedVector& ) = default;

		/**
		* \brief Default move constructor.
		*/
		DistributedVector( DistributedVector&& ) = default;

		/**
		* \brief Copy-assignment operator for copying data from another vector.
		*/
		DistributedVector& operator=( const DistributedVector& ) = default;

		/**
		* \brief Move-assignment operator for acquiring data from \e rvalues.
		*/
		DistributedVector& operator=( DistributedVector&& ) = default;

		// we return only the view so that the user cannot resize it
		LocalViewType getLocalView();

src/TNL/Containers/Expressions/VerticalOperations.h

+12 −2

Original line number	Diff line number	Diff line
		@@ -35,7 +35,12 @@ auto ExpressionMin( const Expression& expression )

		const auto view = expression.getConstView();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; };
		auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); };
		auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b )
		{
		// use argument-dependent lookup and make TNL::min available for unqualified calls
		using TNL::min;
		return min( a, b );
		};
		static_assert( std::numeric_limits< ResultType >::is_specialized,
		"std::numeric_limits is not specialized for the reduction's result type" );
		return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() );
		@@ -72,7 +77,12 @@ auto ExpressionMax( const Expression& expression )

		const auto view = expression.getConstView();
		auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; };
		auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); };
		auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b )
		{
		// use argument-dependent lookup and make TNL::max available for unqualified calls
		using TNL::max;
		return max( a, b );
		};
		static_assert( std::numeric_limits< ResultType >::is_specialized,
		"std::numeric_limits is not specialized for the reduction's result type" );
		return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() );