Loading src/TNL/Algorithms/Segments/BiEllpackView.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -561,7 +561,7 @@ segmentsReductionKernel( IndexType gridIdx, const int warpIdx = threadIdx.x / WarpSize; const int warpsCount = BlockDim / WarpSize; constexpr int groupsInStrip = 6; //getLogWarpSize() + 1; IndexType firstGroupIdx = strip * groupsInStrip; //IndexType firstGroupIdx = strip * groupsInStrip; IndexType firstGroupInBlock = 8 * ( strip / 8 ) * groupsInStrip; IndexType groupHeight = getWarpSize(); Loading src/TNL/Atomic.h +1 −0 Original line number Diff line number Diff line Loading @@ -63,6 +63,7 @@ public: // this copy-constructor and copy-assignment operator are not atomic as they // synchronize only with respect to one or the other object. Atomic( const Atomic& desired ) noexcept : std::atomic< T >() { this->store(desired.load()); } Loading src/TNL/Containers/DistributedArray.h +1 −1 Original line number Diff line number Diff line Loading @@ -50,7 +50,7 @@ public: DistributedArray() = default; DistributedArray( DistributedArray& ) = default; DistributedArray( const DistributedArray& ) = default; DistributedArray( LocalRangeType localRange, Index globalSize, CommunicationGroup group = Communicator::AllGroup ); Loading src/TNL/Containers/DistributedVector.h +20 −0 Original line number Diff line number Diff line Loading @@ -51,6 +51,26 @@ public: using BaseType::DistributedArray; using BaseType::operator=; /** * \brief Copy constructor (makes a deep copy). */ explicit DistributedVector( const DistributedVector& ) = default; /** * \brief Default move constructor. */ DistributedVector( DistributedVector&& ) = default; /** * \brief Copy-assignment operator for copying data from another vector. */ DistributedVector& operator=( const DistributedVector& ) = default; /** * \brief Move-assignment operator for acquiring data from \e rvalues. */ DistributedVector& operator=( DistributedVector&& ) = default; // we return only the view so that the user cannot resize it LocalViewType getLocalView(); Loading src/TNL/Containers/Expressions/VerticalOperations.h +12 −2 Original line number Diff line number Diff line Loading @@ -35,7 +35,12 @@ auto ExpressionMin( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { // use argument-dependent lookup and make TNL::min available for unqualified calls using TNL::min; return min( a, b ); }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); Loading Loading @@ -72,7 +77,12 @@ auto ExpressionMax( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { // use argument-dependent lookup and make TNL::max available for unqualified calls using TNL::max; return max( a, b ); }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); Loading Loading
src/TNL/Algorithms/Segments/BiEllpackView.hpp +1 −1 Original line number Diff line number Diff line Loading @@ -561,7 +561,7 @@ segmentsReductionKernel( IndexType gridIdx, const int warpIdx = threadIdx.x / WarpSize; const int warpsCount = BlockDim / WarpSize; constexpr int groupsInStrip = 6; //getLogWarpSize() + 1; IndexType firstGroupIdx = strip * groupsInStrip; //IndexType firstGroupIdx = strip * groupsInStrip; IndexType firstGroupInBlock = 8 * ( strip / 8 ) * groupsInStrip; IndexType groupHeight = getWarpSize(); Loading
src/TNL/Atomic.h +1 −0 Original line number Diff line number Diff line Loading @@ -63,6 +63,7 @@ public: // this copy-constructor and copy-assignment operator are not atomic as they // synchronize only with respect to one or the other object. Atomic( const Atomic& desired ) noexcept : std::atomic< T >() { this->store(desired.load()); } Loading
src/TNL/Containers/DistributedArray.h +1 −1 Original line number Diff line number Diff line Loading @@ -50,7 +50,7 @@ public: DistributedArray() = default; DistributedArray( DistributedArray& ) = default; DistributedArray( const DistributedArray& ) = default; DistributedArray( LocalRangeType localRange, Index globalSize, CommunicationGroup group = Communicator::AllGroup ); Loading
src/TNL/Containers/DistributedVector.h +20 −0 Original line number Diff line number Diff line Loading @@ -51,6 +51,26 @@ public: using BaseType::DistributedArray; using BaseType::operator=; /** * \brief Copy constructor (makes a deep copy). */ explicit DistributedVector( const DistributedVector& ) = default; /** * \brief Default move constructor. */ DistributedVector( DistributedVector&& ) = default; /** * \brief Copy-assignment operator for copying data from another vector. */ DistributedVector& operator=( const DistributedVector& ) = default; /** * \brief Move-assignment operator for acquiring data from \e rvalues. */ DistributedVector& operator=( DistributedVector&& ) = default; // we return only the view so that the user cannot resize it LocalViewType getLocalView(); Loading
src/TNL/Containers/Expressions/VerticalOperations.h +12 −2 Original line number Diff line number Diff line Loading @@ -35,7 +35,12 @@ auto ExpressionMin( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::min( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { // use argument-dependent lookup and make TNL::min available for unqualified calls using TNL::min; return min( a, b ); }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::max() ); Loading Loading @@ -72,7 +77,12 @@ auto ExpressionMax( const Expression& expression ) const auto view = expression.getConstView(); auto fetch = [=] __cuda_callable__ ( IndexType i ) { return view[ i ]; }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { return TNL::max( a, b ); }; auto reduction = [] __cuda_callable__ ( const ResultType& a, const ResultType& b ) { // use argument-dependent lookup and make TNL::max available for unqualified calls using TNL::max; return max( a, b ); }; static_assert( std::numeric_limits< ResultType >::is_specialized, "std::numeric_limits is not specialized for the reduction's result type" ); return Algorithms::Reduction< typename Expression::DeviceType >::reduce( ( IndexType ) 0, expression.getSize(), reduction, fetch, std::numeric_limits< ResultType >::lowest() ); Loading