Implementing SlicedEllpack format. (e0af631b) · Commits · TNL / tnl-dev

src/core/mfuncs.h

+23 −4

Original line number	Diff line number	Diff line
		@@ -21,24 +21,40 @@
		#include <math.h>
		#include <stdlib.h>

		template< typename Type1, typename Type2 > Type1 Min( const Type1& a, const Type2& b )
		template< typename Type1, typename Type2 >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Type1 Min( const Type1& a, const Type2& b )
		{
		return a < b ? a : b;
		};

		template< typename Type1, typename Type2 > Type1 Max( const Type1& a, const Type2& b )
		template< typename Type1, typename Type2 >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		Type1 Max( const Type1& a, const Type2& b )
		{
		return a > b ? a : b;
		};

		template< typename Type > void Swap( Type& a, Type& b )
		template< typename Type >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		void Swap( Type& a, Type& b )
		{
		Type tmp( a );
		a = b;
		b = tmp;
		};

		template< class T > T Sign( const T& a )
		template< class T >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		T Sign( const T& a )
		{
		if( a < ( T ) 0 ) return -1;
		if( a == ( T ) 0 ) return 0;
		@@ -46,6 +62,9 @@ template< class T > T Sign( const T& a )
		};

		template< class T >
		#ifdef HAVE_CUDA
		__device__ __host__
		#endif
		T tnlAbs( const T& n )
		{
		if( n < ( T ) 0 )

src/implementation/matrices/tnlSlicedEllpackMatrix_impl.h

+17 −1

Original line number	Diff line number	Diff line
		@@ -618,7 +618,20 @@ __global__ void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKerne
		const typename tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >::RowLengthsVector* rowLengths,
		int gridIdx )
		{

		const Index sliceIdx = gridIdx * tnlCuda::getMaxGridSize() * blockDim.x + blockIdx.x * blockDim.x + threadIdx.x;
		Index rowIdx = sliceIdx * SliceSize;
		Index rowInSliceIdx( 0 );
		Index maxRowLength( 0 );
		while( rowInSliceIdx < SliceSize && rowIdx < matrix->getRows() )
		{
		printf( "sliceIdx = %d rowInSliceIdx = %d SliceSize = %d rowIdx = %d matrix->getRows() = %d \n", sliceIdx, rowInSliceIdx, SliceSize, rowIdx, matrix->getRows() );
		maxRowLength = Max( maxRowLength, rowLengths[ rowIdx ] );
		printf( "threadIdx.x = %d, maxRowLength = %d \n", threadIdx.x, maxRowLength );
		rowIdx++;
		rowInSliceIdx++;
		}
		//matrix->sliceRowLengths[ sliceIdx ] = maxRowLength;
		//matrix->slicePointers[ sliceIdx ] = maxRowLength * SliceSize;
		}
		#endif

		@@ -679,6 +692,7 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda >
		dim3 cudaBlockSize( 256 ), cudaGridSize( tnlCuda::getMaxGridSize() );
		const Index cudaBlocks = roundUpDivision( numberOfSlices, cudaBlockSize.x );
		const Index cudaGrids = roundUpDivision( cudaBlocks, tnlCuda::getMaxGridSize() );
		cout << rowLengths << endl;
		for( int gridIdx = 0; gridIdx < cudaGrids; gridIdx++ )
		{
		if( gridIdx == cudaGrids - 1 )
		@@ -691,6 +705,8 @@ class tnlSlicedEllpackMatrixDeviceDependentCode< tnlCuda >
		tnlCuda::freeFromDevice( kernel_matrix );
		tnlCuda::freeFromDevice( kernel_rowLengths );
		checkCudaDevice;
		cout << rowLengths << endl;
		cout << matrix.slicePointers << endl << matrix.sliceRowLengths << endl;
		#endif
		}
		};

src/matrices/tnlSlicedEllpackMatrix.h

+21 −0

Original line number	Diff line number	Diff line
		@@ -28,6 +28,21 @@ template< typename Real = double,
		typename Device = tnlHost,
		typename Index = int,
		int SliceSize = 32 >
		class tnlSlicedEllpackMatrix;

		#ifdef HAVE_CUDA
		template< typename Real,
		typename Index,
		int SliceSize >
		__global__ void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKernel( tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >* matrix,
		const typename tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >::RowLengthsVector* rowLengths,
		int gridIdx );
		#endif

		template< typename Real,
		typename Device,
		typename Index,
		int SliceSize >
		class tnlSlicedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index >
		{
		public:
		@@ -178,6 +193,12 @@ class tnlSlicedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index >

		typedef tnlSlicedEllpackMatrixDeviceDependentCode< DeviceType > DeviceDependentCode;
		friend class tnlSlicedEllpackMatrixDeviceDependentCode< DeviceType >;
		#ifdef HAVE_CUDA
		friend void tnlSlicedEllpackMatrix_computeMaximalRowLengthInSlices_CudaKernel< Real, Index, SliceSize >(
		tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >* matrix,
		const typename tnlSlicedEllpackMatrix< Real, tnlCuda, Index, SliceSize >::RowLengthsVector* rowLengths,
		int gridIdx );
		#endif

		};