Commit cadfb88a authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Tomáš Oberhuber
Browse files

Added new CSR adaptive kernel block descriptor.

parent 856bac74
Loading
Loading
Loading
Loading
+10 −9
Original line number Diff line number Diff line
@@ -113,11 +113,12 @@ segmentsReductionCSRAdaptiveKernel( BlocksView blocks,
   else // blockType == Type::LONG - several warps per segment
   {
      // Number of elements processed by previous warps
      const Index offset = block.index[1] * MAX_ELEM_PER_WARP;
      Index to = begin + (block.index[1]  + 1) * MAX_ELEM_PER_WARP;
      const Index segmentIdx = block.index[0];
      const Index offset = //block.index[1] * MAX_ELEM_PER_WARP;
         block.getWarpIdx() * MAX_ELEM_PER_WARP;
      Index to = begin + (block.getWarpIdx()  + 1) * MAX_ELEM_PER_WARP;
      const Index segmentIdx = block.getFirstSegment();//block.index[0];
      //minID = offsets[block.index[0] ];
      const Index end = offsets[block.index[0] + 1];
      const Index end = offsets[segmentIdx + 1];
      const int tid = threadIdx.x;

      if( to > end )
@@ -215,7 +216,7 @@ struct CSRKernelAdaptiveView
         return;
      }

      //this->printBlocks();
      this->printBlocks();
      static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;
      //static constexpr Index THREADS_SCALAR = 128;
      //static constexpr Index THREADS_VECTOR = 128;
@@ -390,15 +391,15 @@ struct CSRKernelAdaptive

            if( type == details::Type::LONG )
            {
               inBlock.emplace_back( start, details::Type::LONG, 0 );
               const Index blocksCount = inBlock.size();
               const Index warpsPerCudaBlock = THREADS_ADAPTIVE / TNL::Cuda::getWarpSize();
               const Index warpsLeft = roundUpDivision( blocksCount, warpsPerCudaBlock ) * warpsPerCudaBlock - blocksCount;
               //Index parts = roundUpDivision(sum, this->SHARED_PER_WARP);
               /*for( Index index = 1; index < warpsLeft; index++ )
               inBlock.emplace_back( start, details::Type::LONG, 0, warpsLeft );
               for( Index index = 1; index < warpsLeft; index++ )
               {
                  inBlock.emplace_back(start, Type::LONG, index);
               }*/
                  inBlock.emplace_back( start, details::Type::LONG, index, warpsLeft );
               }
            }
            else
            {
+51 −21
Original line number Diff line number Diff line
@@ -108,18 +108,35 @@ union CSRAdaptiveKernelBlockDescriptor
#else

template< typename Index >
union CSRAdaptiveKernelBlockDescriptor
{
   CSRAdaptiveKernelBlockDescriptor(Index row, Type type = Type::VECTOR, Index index = 0) noexcept
   {
      this->index[0] = row;
struct CSRAdaptiveKernelBlockDescriptor
{
   CSRAdaptiveKernelBlockDescriptor( Index firstSegmentIdx,
                                     Type type = Type::VECTOR,
                                     uint8_t warpIdx = 0,
                                     uint8_t warpsCount = 0 ) noexcept
   {
      this->firstSegmentIdx = firstSegmentIdx;
      this->type = ( uint8_t ) type;
      this->warpIdx = warpIdx;
      this->warpsCount = warpsCount;
      /*this->index[0] = row;
      this->index[1] = index;
      this->byte[sizeof(Index) == 4 ? 7 : 15] = (uint8_t)type;
      this->byte[sizeof(Index) == 4 ? 7 : 15] = (uint8_t)type;*/
   }

   CSRAdaptiveKernelBlockDescriptor(Index row, Type type, Index nextRow, Index maxID, Index minID) noexcept
   CSRAdaptiveKernelBlockDescriptor( Index firstSegmentIdx,
                                     Type type,
                                     Index lastSegmentIdx,
                                     Index end,
                                     Index begin ) noexcept
   {
      this->index[0] = row;
      this->firstSegmentIdx = firstSegmentIdx;
      this->warpIdx = 0;
      this->blockSize = end - begin;
      this->segmentsInBlock = lastSegmentIdx - firstSegmentIdx;
      this->type = ( uint8_t ) type;

      /*this->index[0] = row;
      this->index[1] = 0;
      this->twobytes[sizeof(Index) == 4 ? 2 : 4] = maxID - minID;

@@ -129,23 +146,25 @@ union CSRAdaptiveKernelBlockDescriptor
      if (type == Type::STREAM)
         this->byte[sizeof(Index) == 4 ? 7 : 15] |= 0b1000000;
      else if (type == Type::VECTOR)
         this->byte[sizeof(Index) == 4 ? 7 : 15] |= 0b10000000;
         this->byte[sizeof(Index) == 4 ? 7 : 15] |= 0b10000000;*/
   }

   CSRAdaptiveKernelBlockDescriptor() = default;

   __cuda_callable__ Type getType() const
   {
      if( byte[ sizeof( Index ) == 4 ? 7 : 15 ] & 0b1000000 )
      return ( Type ) this->type;
      /*if( byte[ sizeof( Index ) == 4 ? 7 : 15 ] & 0b1000000 )
         return Type::STREAM;
      if( byte[ sizeof( Index ) == 4 ? 7 : 15 ] & 0b10000000 )
         return Type::VECTOR;
      return Type::LONG;
      return Type::LONG;*/
   }

   __cuda_callable__ const Index& getFirstSegment() const
   {
      return index[ 0 ];
      return this->firstSegmentIdx;
      //return index[ 0 ];
   }

   /***
@@ -153,7 +172,8 @@ union CSRAdaptiveKernelBlockDescriptor
    */
   __cuda_callable__ const Index getSize() const
   {
      return twobytes[ sizeof(Index) == 4 ? 2 : 4 ];
      return this->blockSize;
      //return twobytes[ sizeof(Index) == 4 ? 2 : 4 ];
   }

   /***
@@ -161,14 +181,19 @@ union CSRAdaptiveKernelBlockDescriptor
    */
   __cuda_callable__ const Index getSegmentsInBlock() const
   {
      return ( twobytes[ sizeof( Index ) == 4 ? 3 : 5 ] & 0x3FFF );
      return this->segmentsInBlock;
      //return ( twobytes[ sizeof( Index ) == 4 ? 3 : 5 ] & 0x3FFF );
   }

   __cuda_callable__ const uint8_t getWarpIdx() const
   {
      return this->warpIdx;
   }

   void print( std::ostream& str ) const
   {
      Type type = this->getType();
      str << "Type: ";
      switch( type )
      switch( this->getType() )
      {
         case Type::STREAM:
            str << " Stream ";
@@ -180,13 +205,18 @@ union CSRAdaptiveKernelBlockDescriptor
            str << " Long ";
            break;
      }
      str << " first segment: " << getFirstSegment();
      str << " block end: " << getSize();
      str << " index in warp: " << index[ 1 ];
      str << " first segment: " << this->getFirstSegment();
      str << " block end: " << this->getSize();
      str << " index in warp: " << this->getWarpIdx();
   }
   Index index[2]; // index[0] is row pointer, index[1] is index in warp
   uint8_t byte[sizeof(Index) == 4 ? 8 : 16]; // byte[7/15] is type specificator
   uint16_t twobytes[sizeof(Index) == 4 ? 4 : 8]; //twobytes[2/4] is maxID - minID

   uint8_t type;
   Index firstSegmentIdx, blockSize, segmentsInBlock;
   uint8_t warpIdx, warpsCount;

   //Index index[2]; // index[0] is row pointer, index[1] is index in warp
   //uint8_t byte[sizeof(Index) == 4 ? 8 : 16]; // byte[7/15] is type specificator
   //uint16_t twobytes[sizeof(Index) == 4 ? 4 : 8]; //twobytes[2/4] is maxID - minID
                                                //twobytes[3/5] is nextRow - row
};