Commit e0af631b authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Implementing SlicedEllpack format.

parent 04f21175
Loading
Loading
Loading
Loading
+23 −4
Original line number Diff line number Diff line
@@ -21,24 +21,40 @@
#include <math.h>
#include <stdlib.h>

template< typename Type1, typename Type2 > Type1 Min( const Type1& a, const Type2& b )
template< typename Type1, typename Type2 >
#ifdef HAVE_CUDA
__device__ __host__
#endif
Type1 Min( const Type1& a, const Type2& b )
{
   return a < b ? a : b;
};

template< typename Type1, typename Type2 > Type1 Max( const Type1& a, const Type2& b )
template< typename Type1, typename Type2 >
#ifdef HAVE_CUDA
__device__ __host__
#endif
Type1 Max( const Type1& a, const Type2& b )
{
   return a > b ? a : b;
};

template< typename Type > void Swap( Type& a, Type& b )
template< typename Type >
#ifdef HAVE_CUDA
__device__ __host__
#endif
void Swap( Type& a, Type& b )
{
   Type tmp( a );
   a = b;
   b = tmp;
};

template< class T > T Sign( const T& a )
template< class T >
#ifdef HAVE_CUDA
__device__ __host__
#endif
T Sign( const T& a )
{
   if( a < ( T ) 0 ) return -1;
   if( a == ( T ) 0 ) return 0;
@@ -46,6 +62,9 @@ template< class T > T Sign( const T& a )
};

template< class T >
#ifdef HAVE_CUDA
__device__ __host__
#endif
T tnlAbs( const T& n )
{
   if( n < ( T ) 0 )
+17 −1
Original line number Diff line number Diff line
@@ -618,7 +618,20 @@ __global__ void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKerne
                                                                                   const typename tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >::RowLengthsVector* rowLengths,
                                                                                   int gridIdx )
{

   const Index sliceIdx = gridIdx * tnlCuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
   Index rowIdx = sliceIdx * SliceSize;
   Index rowInSliceIdx( 0 );
   Index maxRowLength( 0 );
   while( rowInSliceIdx < SliceSize && rowIdx < matrix->getRows() )
   {
      printf( "sliceIdx = %d rowInSliceIdx = %d SliceSize = %d rowIdx = %d matrix->getRows() = %d \n", sliceIdx, rowInSliceIdx, SliceSize, rowIdx, matrix->getRows() );
      maxRowLength = Max( maxRowLength, rowLengths[ rowIdx ] );
      printf( "threadIdx.x = %d, maxRowLength = %d \n", threadIdx.x, maxRowLength );
      rowIdx++;
      rowInSliceIdx++;
   }
   //matrix->sliceRowLengths[ sliceIdx ] = maxRowLength;
   //matrix->slicePointers[ sliceIdx ] = maxRowLength * SliceSize;
}
#endif

@@ -679,6 +692,7 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda >
         dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() );
         const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
         const Index cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() );
         cout << rowLengths << endl;
         for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
         {
            if( gridIdx == cudaGrids - 1 )
@@ -691,6 +705,8 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda >
         tnlCuda::freeFromDevice( kernel_matrix );
         tnlCuda::freeFromDevice( kernel_rowLengths );
         checkCudaDevice;
         cout << rowLengths << endl;
         cout << matrix.slicePointers << endl << matrix.sliceRowLengths << endl;
#endif
      }
};
+21 −0
Original line number Diff line number Diff line
@@ -28,6 +28,21 @@ template< typename Real = double,
          typename Device = tnlHost,
          typename Index = int,
          int SliceSize = 32 >
class tnlSlicedEllpackMatrix;

#ifdef HAVE_CUDA
template< typename Real,
          typename Index,
          int SliceSize >
__global__ void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKernel( tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >* matrix,
                                                                                   const typename tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >::RowLengthsVector* rowLengths,
                                                                                   int gridIdx );
#endif

template< typename Real,
          typename Device,
          typename Index,
          int SliceSize >
class tnlSlicedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index >
{
   public:
@@ -178,6 +193,12 @@ class tnlSlicedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index >

   typedef tnlSlicedEllpackMatrixDeviceDependentCode< DeviceType > DeviceDependentCode;
   friend class tnlSlicedEllpackMatrixDeviceDependentCode< DeviceType >;
#ifdef HAVE_CUDA
   friend void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize >( 
   tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >* matrix,
                                                                                                                       const typename tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >::RowLengthsVector* rowLengths,
                                                                                                                       int gridIdx );
#endif

};