Commit 4196f915 authored by Illia Kolesnik's avatar Illia Kolesnik Committed by Tomáš Oberhuber
Browse files

Fixed block sizes for CSR Light, other improvements

parent 382eb38f
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -110,16 +110,16 @@ public:
   /* Configuration of CSR SpMV kernels ----------------------------------------- */

   /* Block sizes */
   static constexpr Index THREADS_ADAPTIVE = 1024;
   static constexpr Index THREADS_SCALAR = 1024;
   static constexpr Index THREADS_VECTOR = 1024;
   static constexpr Index THREADS_LIGHT = 1024;
   static constexpr Index THREADS_ADAPTIVE = sizeof(Index) == 8 ? 128 : 256;
   static constexpr Index THREADS_SCALAR = 128;
   static constexpr Index THREADS_VECTOR = 128;
   static constexpr Index THREADS_LIGHT = 128;

   /* Max length of row to process one warp */
   static constexpr Index MAX_ELEMENTS_PER_WARP = 1024;
   static constexpr Index MAX_ELEMENTS_PER_WARP = 512;

   /* How many shared memory use per block in CSR Adaptive kernel */
   static constexpr Index SHARED_PER_BLOCK = 49152;
   static constexpr Index SHARED_PER_BLOCK = 24576;

   /* Number of elements in shared memory */
   static constexpr Index SHARED = SHARED_PER_BLOCK/sizeof(Real);
+4 −4
Original line number Diff line number Diff line
@@ -134,7 +134,7 @@ Index findLimit(const Index start,
            type = Type::STREAM;
            return current;
         } else {                  // one long row
            if (sum <= matrix.MAX_ELEMENTS_PER_WARP)
            if (sum <= 2 * matrix.MAX_ELEMENTS_PER_WARP)
               type = Type::VECTOR;
            else
               type = Type::LONG;
@@ -1407,7 +1407,7 @@ template< typename Real,
void SpMVCSRLightPrepare( const Real *inVector,
                          Real* outVector,
                          const CSR< Real, Device, Index, KernelType >& matrix) {
   const Index threads = matrix.THREADS_LIGHT; // max block size
   const Index threads = 1024; // max block size
   const Index rows = matrix.getRowPointers().getSize() - 1;
   /* Copy rowCnt to GPU */
   unsigned rowCnt = 0;
@@ -1554,7 +1554,7 @@ void SpMVCSRLightWithoutAtomicPrepare( const Real *inVector,
      groupSize = 8;
   else if (nnz <= 16)
      groupSize = 16;
   else if (nnz <= matrix.MAX_ELEMENTS_PER_WARP)
   else if (nnz <= 2 * matrix.MAX_ELEMENTS_PER_WARP)
      groupSize = 32; // CSR Vector
   else
      groupSize = roundUpDivision(nnz, matrix.MAX_ELEMENTS_PER_WARP) * 32; // CSR MultiVector