Commit bf4dc990 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Merge branch 'TO/matrices' into 'develop'

To/matrices

See merge request !85
parents d2524e94 92a039dc
Loading
Loading
Loading
Loading
+10 −6
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ cpu_matrix_formats = [ 'CSR',
gpu_matrix_formats = [ 'CSR Legacy Scalar', 'CSR Legacy Vector', 'CSR Legacy MultiVector',
                       'CSR Legacy Light', 'CSR Legacy Light2', 'CSR Legacy Light3', 'CSR Legacy Light4', 'CSR Legacy Light5', 'CSR Legacy Light6', 'CSR Legacy LightWithoutAtomic',
                       'CSR Legacy Adaptive',
                       'CSR< Scalar >', 'CSR< Vector >', 'CSR< Hybrid >', 'CSR< Adaptive >',
                       'Ellpack', 'Ellpack Legacy',
                       'SlicedEllpack', 'SlicedEllpack Legacy',
                       'ChunkedEllpack', 'ChunkedEllpack Legacy',
@@ -36,7 +37,10 @@ cpu_comparison_formats = { 'CSR' : 'CSR Legacy Scalar',
"""
GPU formats to be compared
"""
gpu_comparison_formats = { #'CSR' : 'CSR Legacy Scalar',
gpu_comparison_formats = { 'CSR< Scalar >' : 'CSR Legacy Scalar',
                           'CSR< Vector >' : 'CSR Legacy Vector',
                           'CSR< Hybrid >' : 'CSR Legacy LightWithoutAtomic',
                           'CSR< Adaptive >' : 'CSR Legacy Adaptive',
                           'Ellpack' : 'Ellpack Legacy',
                           'SlicedEllpack' : 'SlicedEllpack Legacy',
                           'BiEllpack' : 'BiEllpack Legacy'
+21 −16
Original line number Diff line number Diff line
@@ -220,6 +220,11 @@ struct CSRKernelAdaptiveView

   ConstViewType getConstView() const { return *this; };

   static TNL::String getKernelType()
   {
      return "Adaptive";
   };

   template< typename OffsetsView,
             typename Fetch,
             typename Reduction,
@@ -344,6 +349,10 @@ struct CSRKernelAdaptive
    using BlocksType = typename ViewType::BlocksType;
    using BlocksView = typename BlocksType::ViewType;

   static TNL::String getKernelType()
   {
      return ViewType::getKernelType();
   };

    static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;

@@ -373,10 +382,11 @@ struct CSRKernelAdaptive
                    Index &sum )
   {
      sum = 0;
      TNL::Containers::Vector< typename Offsets::IndexType, TNL::Devices::Host, typename Offsets::IndexType >
         hostOffsets( offsets );
      for (Index current = start; current < size - 1; current++ )
      {
         Index elements = offsets.getElement(current + 1) -
                           offsets.getElement(current);
         Index elements = hostOffsets[ current + 1 ] - hostOffsets[ current ];
         sum += elements;
         if( sum > SHARED_PER_WARP )
         {
@@ -407,8 +417,8 @@ struct CSRKernelAdaptive
        Index sum, start( 0 ), nextStart( 0 );

        // Fill blocks
        std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlock;
        inBlock.reserve( rows );
        std::vector< details::CSRAdaptiveKernelBlockDescriptor< Index > > inBlocks;
        inBlocks.reserve( rows );

        while( nextStart != rows - 1 )
        {
@@ -417,34 +427,29 @@ struct CSRKernelAdaptive

            if( type == details::Type::LONG )
            {
               const Index blocksCount = inBlock.size();
               const Index blocksCount = inBlocks.size();
               const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
               Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
               if( warpsLeft == 0 )
                  warpsLeft = warpsPerCudaBlock;
               //Index parts = roundUpDivision(sum, this->SHARED_PER_WARP);
               inBlock.emplace_back( start, details::Type::LONG, 0, warpsLeft );
               inBlocks.emplace_back( start, details::Type::LONG, 0, warpsLeft );
               for( Index index = 1; index < warpsLeft; index++ )
               {
                  inBlock.emplace_back( start, details::Type::LONG, index, warpsLeft );
                  inBlocks.emplace_back( start, details::Type::LONG, index, warpsLeft );
               }
            }
            else
            {
               inBlock.emplace_back(start, type,
               inBlocks.emplace_back(start, type,
                    nextStart,
                    offsets.getElement(nextStart),
                    offsets.getElement(start) );
            }
            start = nextStart;
        }
        inBlock.emplace_back(nextStart);

        // Copy values
        this->blocks.setSize(inBlock.size());
        for (size_t i = 0; i < inBlock.size(); ++i)
            this->blocks.setElement(i, inBlock[i]);

        inBlocks.emplace_back(nextStart);
        this->blocks = inBlocks;
        this->view.setBlocks( blocks );
    };

+1 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@ struct CSRKernelHybrid

   ConstViewType getConstView() const;

   static TNL::String getKernelType();

   template< typename OffsetsView,
             typename Fetch,
+9 −0
Original line number Diff line number Diff line
@@ -114,6 +114,15 @@ getView() -> ViewType
    return *this;
}

template< typename Index,
          typename Device >
TNL::String
CSRKernelHybrid< Index, Device >::
getKernelType()
{
    return "Hybrid";
}

template< typename Index,
          typename Device >
auto
+2 −0
Original line number Diff line number Diff line
@@ -38,6 +38,8 @@ struct CSRKernelScalar

    ConstViewType getConstView() const;

    static TNL::String getKernelType();

    template< typename OffsetsView,
              typename Fetch,
              typename Reduction,
Loading