Loading src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h +449 −102 File changed.Preview size limit exceeded, changes collapsed. Show changes src/implementation/matrices/tnlSparseMatrix_impl.h +7 −0 Original line number Diff line number Diff line Loading @@ -111,5 +111,12 @@ bool tnlSparseMatrix< Real, Device, Index >::allocateMatrixElements( const Index return true; } template< typename Real, typename Device, typename Index > void tnlSparseMatrix< Real, Device, Index >::printStructure( ostream& str ) const { } #endif /* TNLSPARSEMATRIX_IMPL_H_ */ src/matrices/tnlChunkedEllpackMatrix.h +79 −12 Original line number Diff line number Diff line Loading @@ -42,6 +42,16 @@ struct tnlChunkedEllpackSliceInfo { return tnlString( "tnlChunkedEllpackSliceInfo" ); }; }; #ifdef HAVE_CUDA template< typename Real, typename Index, typename Vector > __global__ void tnlChunkedEllpackMatrixVectorProductCudaKernel( const tnlChunkedEllpackMatrix< Real, tnlCuda, Index >* matrix, const Vector* inVector, Vector* outVector, int gridIdx ); #endif template< typename Real, typename Device, typename Index > class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > { Loading Loading @@ -85,6 +95,11 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > IndexType getDesiredChunkSize() const; #ifdef HAVE_CUDA __device__ __host__ #endif IndexType getNumberOfSlices() const; #ifdef HAVE_CUDA __device__ __host__ #endif Loading Loading @@ -165,6 +180,14 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; #ifdef HAVE_CUDA template< typename Vector > __device__ void computeSliceVectorProduct( const Vector* inVector, Vector* outVector, int gridIdx ) const; #endif template< typename Vector > void vectorProduct( const Vector& inVector, Vector& outVector ) const; Loading Loading @@ -194,11 +217,12 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > void print( ostream& str ) const; void printStructure( ostream& str ) const; protected: void resolveSliceSizes( const tnlVector< Index, tnlHost, Index >& rowLengths, IndexType& numberOfSlices ); void resolveSliceSizes( const tnlVector< Index, tnlHost, Index >& rowLengths ); bool setSlice( const RowLengthsVector& rowLengths, const IndexType sliceIdx, Loading @@ -211,6 +235,9 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > RealType& value, RealType& thisElementMultiplicator ); #ifdef HAVE_CUDA __device__ __host__ #endif bool addElementToChunkFast( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, Loading @@ -218,18 +245,60 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > RealType& value, RealType& thisElementMultiplicator ); #ifdef HAVE_CUDA __device__ __host__ #endif void setChunkFast( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const IndexType* columnIndexes, const RealType* values, const IndexType elements ); void setChunk( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const IndexType* columnIndexes, const RealType* values, const IndexType elements ); bool getElementInChunk( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const IndexType column, RealType& value ) const; #ifdef HAVE_CUDA __device__ __host__ #endif bool getElementInChunkFast( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const IndexType column, RealType& value ) const; void getChunk( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, IndexType* columns, RealType* values ) const; #ifdef HAVE_CUDA __device__ __host__ #endif void getChunkFast( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, IndexType* columns, RealType* values ) const; template< typename Vector > typename Vector::RealType chunkVectorProduct( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const Vector& vector ) const; IndexType chunksInSlice, desiredChunkSize; Loading @@ -237,22 +306,20 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > tnlArray< ChunkedEllpackSliceInfo, Device, Index > slices; //IndexType numberOfSlices; IndexType numberOfSlices; typedef tnlChunkedEllpackMatrixDeviceDependentCode< DeviceType > DeviceDependentCode; friend class tnlChunkedEllpackMatrixDeviceDependentCode< DeviceType >; friend class tnlChunkedEllpackMatrix< RealType, tnlHost, IndexType >; friend class tnlChunkedEllpackMatrix< RealType, tnlCuda, IndexType >; /*#ifdef HAVE_CUDA friend void tnlChunkedEllpackMatrix_setSlices_CudaKernel< Real, Index, 256 >( tnlChunkedEllpackMatrix< Real, tnlCuda, Index >* matrix, const RowLengthsVector* rowLengths, const Index numberOfSlices, Index* elementsToAllocation, const Index gridIdx ); #endif*/ #ifdef HAVE_CUDA template< typename Vector > friend void tnlChunkedEllpackMatrixVectorProductCudaKernel( const tnlChunkedEllpackMatrix< Real, tnlCuda, Index >* matrix, const Vector* inVector, Vector* outVector, int gridIdx ); #endif }; #include <implementation/matrices/tnlChunkedEllpackMatrix_impl.h> Loading src/matrices/tnlSparseMatrix.h +3 −0 Original line number Diff line number Diff line Loading @@ -49,10 +49,13 @@ class tnlSparseMatrix : public tnlMatrix< Real, Device, Index > bool load( tnlFile& file ); void printStructure( ostream& str ) const; protected: bool allocateMatrixElements( const IndexType& numberOfMatrixElements ); tnlVector< Index, Device, Index > columnIndexes; }; Loading tests/unit-tests/matrices/tnlChunkedEllpackMatrixTest.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -27,7 +27,7 @@ int main( int argc, char* argv[] ) { tnlSparseMatrixTester< tnlChunkedEllpackMatrix< float, tnlHost, int >, tnlChunkedEllpackMatrixTestSetup< 4, 2 > > tester; //tester.setElementTest(); //tester. vectorProduct_DiagonalMatrixTest(); #ifdef HAVE_CPPUNIT if( ! tnlUnitTestStarter :: run< tnlSparseMatrixTester< tnlChunkedEllpackMatrix< float, tnlHost, int >, tnlChunkedEllpackMatrixTestSetup< 4, 2 > > >() || Loading Loading
src/implementation/matrices/tnlChunkedEllpackMatrix_impl.h +449 −102 File changed.Preview size limit exceeded, changes collapsed. Show changes
src/implementation/matrices/tnlSparseMatrix_impl.h +7 −0 Original line number Diff line number Diff line Loading @@ -111,5 +111,12 @@ bool tnlSparseMatrix< Real, Device, Index >::allocateMatrixElements( const Index return true; } template< typename Real, typename Device, typename Index > void tnlSparseMatrix< Real, Device, Index >::printStructure( ostream& str ) const { } #endif /* TNLSPARSEMATRIX_IMPL_H_ */
src/matrices/tnlChunkedEllpackMatrix.h +79 −12 Original line number Diff line number Diff line Loading @@ -42,6 +42,16 @@ struct tnlChunkedEllpackSliceInfo { return tnlString( "tnlChunkedEllpackSliceInfo" ); }; }; #ifdef HAVE_CUDA template< typename Real, typename Index, typename Vector > __global__ void tnlChunkedEllpackMatrixVectorProductCudaKernel( const tnlChunkedEllpackMatrix< Real, tnlCuda, Index >* matrix, const Vector* inVector, Vector* outVector, int gridIdx ); #endif template< typename Real, typename Device, typename Index > class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > { Loading Loading @@ -85,6 +95,11 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > IndexType getDesiredChunkSize() const; #ifdef HAVE_CUDA __device__ __host__ #endif IndexType getNumberOfSlices() const; #ifdef HAVE_CUDA __device__ __host__ #endif Loading Loading @@ -165,6 +180,14 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > typename Vector::RealType rowVectorProduct( const IndexType row, const Vector& vector ) const; #ifdef HAVE_CUDA template< typename Vector > __device__ void computeSliceVectorProduct( const Vector* inVector, Vector* outVector, int gridIdx ) const; #endif template< typename Vector > void vectorProduct( const Vector& inVector, Vector& outVector ) const; Loading Loading @@ -194,11 +217,12 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > void print( ostream& str ) const; void printStructure( ostream& str ) const; protected: void resolveSliceSizes( const tnlVector< Index, tnlHost, Index >& rowLengths, IndexType& numberOfSlices ); void resolveSliceSizes( const tnlVector< Index, tnlHost, Index >& rowLengths ); bool setSlice( const RowLengthsVector& rowLengths, const IndexType sliceIdx, Loading @@ -211,6 +235,9 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > RealType& value, RealType& thisElementMultiplicator ); #ifdef HAVE_CUDA __device__ __host__ #endif bool addElementToChunkFast( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, Loading @@ -218,18 +245,60 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > RealType& value, RealType& thisElementMultiplicator ); #ifdef HAVE_CUDA __device__ __host__ #endif void setChunkFast( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const IndexType* columnIndexes, const RealType* values, const IndexType elements ); void setChunk( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const IndexType* columnIndexes, const RealType* values, const IndexType elements ); bool getElementInChunk( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const IndexType column, RealType& value ) const; #ifdef HAVE_CUDA __device__ __host__ #endif bool getElementInChunkFast( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const IndexType column, RealType& value ) const; void getChunk( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, IndexType* columns, RealType* values ) const; #ifdef HAVE_CUDA __device__ __host__ #endif void getChunkFast( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, IndexType* columns, RealType* values ) const; template< typename Vector > typename Vector::RealType chunkVectorProduct( const IndexType sliceOffset, const IndexType chunkIndex, const IndexType chunkSize, const Vector& vector ) const; IndexType chunksInSlice, desiredChunkSize; Loading @@ -237,22 +306,20 @@ class tnlChunkedEllpackMatrix : public tnlSparseMatrix< Real, Device, Index > tnlArray< ChunkedEllpackSliceInfo, Device, Index > slices; //IndexType numberOfSlices; IndexType numberOfSlices; typedef tnlChunkedEllpackMatrixDeviceDependentCode< DeviceType > DeviceDependentCode; friend class tnlChunkedEllpackMatrixDeviceDependentCode< DeviceType >; friend class tnlChunkedEllpackMatrix< RealType, tnlHost, IndexType >; friend class tnlChunkedEllpackMatrix< RealType, tnlCuda, IndexType >; /*#ifdef HAVE_CUDA friend void tnlChunkedEllpackMatrix_setSlices_CudaKernel< Real, Index, 256 >( tnlChunkedEllpackMatrix< Real, tnlCuda, Index >* matrix, const RowLengthsVector* rowLengths, const Index numberOfSlices, Index* elementsToAllocation, const Index gridIdx ); #endif*/ #ifdef HAVE_CUDA template< typename Vector > friend void tnlChunkedEllpackMatrixVectorProductCudaKernel( const tnlChunkedEllpackMatrix< Real, tnlCuda, Index >* matrix, const Vector* inVector, Vector* outVector, int gridIdx ); #endif }; #include <implementation/matrices/tnlChunkedEllpackMatrix_impl.h> Loading
src/matrices/tnlSparseMatrix.h +3 −0 Original line number Diff line number Diff line Loading @@ -49,10 +49,13 @@ class tnlSparseMatrix : public tnlMatrix< Real, Device, Index > bool load( tnlFile& file ); void printStructure( ostream& str ) const; protected: bool allocateMatrixElements( const IndexType& numberOfMatrixElements ); tnlVector< Index, Device, Index > columnIndexes; }; Loading
tests/unit-tests/matrices/tnlChunkedEllpackMatrixTest.cpp +1 −1 Original line number Diff line number Diff line Loading @@ -27,7 +27,7 @@ int main( int argc, char* argv[] ) { tnlSparseMatrixTester< tnlChunkedEllpackMatrix< float, tnlHost, int >, tnlChunkedEllpackMatrixTestSetup< 4, 2 > > tester; //tester.setElementTest(); //tester. vectorProduct_DiagonalMatrixTest(); #ifdef HAVE_CPPUNIT if( ! tnlUnitTestStarter :: run< tnlSparseMatrixTester< tnlChunkedEllpackMatrix< float, tnlHost, int >, tnlChunkedEllpackMatrixTestSetup< 4, 2 > > >() || Loading