Merge branch 'IK/matrices-csr' into 'develop' (9953d3ab) · Commits · TNL / tnl-dev

src/Benchmarks/BLAS/spmv.h

+6 −1

Original line number	Diff line number	Diff line
		@@ -27,6 +27,11 @@ namespace Benchmarks {
		template< typename Real, typename Device, typename Index >
		using SlicedEllpack = Matrices::Legacy::SlicedEllpack< Real, Device, Index >;

		// Legacy formats
		template< typename Real, typename Device, typename Index >
		using SparseMatrixLegacy_CSR_Scalar = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRScalar >;


		template< typename Matrix >
		int setHostTestMatrix( Matrix& matrix,
		const int elementsPerRow )
		@@ -173,7 +178,7 @@ benchmarkSpmvSynthetic( Benchmark & benchmark,
		const int & elementsPerRow )
		{
		// TODO: benchmark all formats from tnl-benchmark-spmv (different parameters of the base formats)
		benchmarkSpMV< Real, Matrices::Legacy::CSR >( benchmark, size, elementsPerRow );
		benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, size, elementsPerRow );
		benchmarkSpMV< Real, Matrices::Legacy::Ellpack >( benchmark, size, elementsPerRow );
		benchmarkSpMV< Real, SlicedEllpack >( benchmark, size, elementsPerRow );
		benchmarkSpMV< Real, Matrices::Legacy::ChunkedEllpack >( benchmark, size, elementsPerRow );

src/Benchmarks/SpMV/spmv-legacy.h

+21 −1

Original line number	Diff line number	Diff line
		@@ -61,6 +61,22 @@ using SlicedEllpackSegments = Containers::Segments::SlicedEllpack< Device, Index
		template< typename Real, typename Device, typename Index >
		using SparseMatrix_SlicedEllpack = Matrices::SparseMatrix< Real, Device, Index, Matrices::GeneralMatrix, SlicedEllpackSegments >;

		// Legacy formats
		template< typename Real, typename Device, typename Index >
		using SparseMatrixLegacy_CSR_Scalar = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRScalar >;

		template< typename Real, typename Device, typename Index >
		using SparseMatrixLegacy_CSR_Vector = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRVector >;

		template< typename Real, typename Device, typename Index >
		using SparseMatrixLegacy_CSR_Light = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRLight >;

		template< typename Real, typename Device, typename Index >
		using SparseMatrixLegacy_CSR_Adaptive = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRAdaptive >;

		template< typename Real, typename Device, typename Index >
		using SparseMatrixLegacy_CSR_Stream = Matrices::Legacy::CSR< Real, Device, Index, Matrices::Legacy::CSRStream >;

		// Get the name (with extension) of input matrix file
		std::string getMatrixFileName( const String& InputFileName )
		{
		@@ -259,7 +275,11 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,
		benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse );
		#endif

		benchmarkSpMV< Real, Matrices::Legacy::CSR >( benchmark, hostOutVector, inputFileName, verboseMR );
		benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Scalar >( benchmark, hostOutVector, inputFileName, verboseMR );
		benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Vector >( benchmark, hostOutVector, inputFileName, verboseMR );
		benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Light >( benchmark, hostOutVector, inputFileName, verboseMR );
		benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Adaptive >( benchmark, hostOutVector, inputFileName, verboseMR );
		benchmarkSpMV< Real, SparseMatrixLegacy_CSR_Stream >( benchmark, hostOutVector, inputFileName, verboseMR );
		benchmarkSpMV< Real, SparseMatrix_CSR >( benchmark, hostOutVector, inputFileName, verboseMR );
		benchmarkSpMV< Real, Matrices::Legacy::Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR );
		benchmarkSpMV< Real, SparseMatrix_Ellpack >( benchmark, hostOutVector, inputFileName, verboseMR );

src/Python/pytnl/tnl/SparseMatrix.cpp

+6 −5

Original line number	Diff line number	Diff line
		@@ -16,14 +16,15 @@ using SE_cuda = TNL::Matrices::Legacy::SlicedEllpack< double, TNL::Devices::Cuda

		void export_SparseMatrices( py::module & m )
		{
		export_Matrix< CSR_host >( m, "CSR" );
		// TODO: This stop working after adding template parameter KernelType to Legacy::CSR
		//export_Matrix< CSR_host >( m, "CSR" );
		export_Matrix< E_host >( m, "Ellpack" );
		export_Matrix< SE_host >( m, "SlicedEllpack" );

		m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< CSR_host, E_host >);
		m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< E_host, CSR_host >);
		m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< CSR_host, SE_host >);
		m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< SE_host, CSR_host >);
		//m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< CSR_host, E_host >);
		//m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< E_host, CSR_host >);
		//m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< CSR_host, SE_host >);
		//m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< SE_host, CSR_host >);
		m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< E_host, SE_host >);
		m.def("copySparseMatrix", &TNL::Matrices::copySparseMatrix< SE_host, E_host >);
		}

src/TNL/Matrices/Legacy/CSR.h

+41 −20

Original line number	Diff line number	Diff line
		@@ -31,7 +31,9 @@ class CusparseCSR;
		template< typename Device >
		class CSRDeviceDependentCode;

		template< typename Real, typename Device = Devices::Host, typename Index = int >
		enum CSRKernel { CSRScalar, CSRVector, CSRHybrid, CSRLight, CSRAdaptive, CSRStream };

		template< typename Real, typename Device = Devices::Host, typename Index = int, CSRKernel KernelType = CSRScalar >
		class CSR : public Sparse< Real, Device, Index >
		{
		private:
		@@ -40,7 +42,7 @@ private:
		using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >;

		// friend class will be needed for templated assignment operators
		template< typename Real2, typename Device2, typename Index2 >
		template< typename Real2, typename Device2, typename Index2, CSRKernel KernelType2 >
		friend class CSR;

		public:
		@@ -60,7 +62,10 @@ public:
		typename _Index = Index >
		using Self = CSR< _Real, _Device, _Index >;

		enum SPMVCudaKernel { scalar, vector, hybrid };
		constexpr CSRKernel getSpMVKernelType() { return KernelType; };
		//enum SPMVCudaKernel { scalar, vector, hybrid };

		using Sparse< Real, Device, Index >::getAllocatedElementsCount;

		CSR();

		@@ -85,8 +90,8 @@ public:
		__cuda_callable__
		IndexType getNonZeroRowLengthFast( const IndexType row ) const;

		template< typename Real2, typename Device2, typename Index2 >
		void setLike( const CSR< Real2, Device2, Index2 >& matrix );
		template< typename Real2, typename Device2, typename Index2, CSRKernel KernelType2 >
		void setLike( const CSR< Real2, Device2, Index2, KernelType2 >& matrix );

		void reset();

		@@ -165,13 +170,13 @@ public:
		OutVector& outVector ) const;
		// TODO: add const RealType& multiplicator = 1.0 )

		template< typename Real2, typename Index2 >
		void addMatrix( const CSR< Real2, Device, Index2 >& matrix,
		template< typename Real2, typename Index2, CSRKernel KernelType2 >
		void addMatrix( const CSR< Real2, Device, Index2, KernelType2 >& matrix,
		const RealType& matrixMultiplicator = 1.0,
		const RealType& thisMatrixMultiplicator = 1.0 );

		template< typename Real2, typename Index2 >
		void getTransposition( const CSR< Real2, Device, Index2 >& matrix,
		template< typename Real2, typename Index2, CSRKernel KernelType2 >
		void getTransposition( const CSR< Real2, Device, Index2, KernelType2 >& matrix,
		const RealType& matrixMultiplicator = 1.0 );

		template< typename Vector1, typename Vector2 >
		@@ -184,9 +189,9 @@ public:
		CSR& operator=( const CSR& matrix );

		// cross-device copy assignment
		template< typename Real2, typename Device2, typename Index2,
		template< typename Real2, typename Device2, typename Index2, CSRKernel KernelType2,
		typename = typename Enabler< Device2 >::type >
		CSR& operator=( const CSR< Real2, Device2, Index2 >& matrix );
		CSR& operator=( const CSR< Real2, Device2, Index2, KernelType2 >& matrix );

		void save( File& file ) const;

		@@ -198,10 +203,10 @@ public:

		void print( std::ostream& str ) const;

		void setCudaKernelType( const SPMVCudaKernel kernel );
		//void setCudaKernelType( const SPMVCudaKernel kernel );

		__cuda_callable__
		SPMVCudaKernel getCudaKernelType() const;
		//__cuda_callable__
		//SPMVCudaKernel getCudaKernelType() const;

		void setCudaWarpSize( const int warpSize );

		@@ -220,17 +225,33 @@ public:
		__device__
		void spmvCudaVectorized( const InVector& inVector,
		OutVector& outVector,
		const IndexType warpStart,
		const IndexType warpEnd,
		const IndexType inWarpIdx ) const;
		const IndexType gridIdx ) const;

		template< typename InVector,
		typename OutVector,
		int warpSize >
		__device__
		void vectorProductCuda( const InVector& inVector,
		OutVector& outVector,
		int gridIdx, int *blocks, size_t size ) const;

		template< typename InVector,
		typename OutVector,
		int warpSize >
		__device__
		void spmvCudaLightSpmv( const InVector& inVector,
		OutVector& outVector,
		int gridIdx) const;

		template< typename InVector,
		typename OutVector,
		int warpSize >
		__device__
		void spmvCSRAdaptive( const InVector& inVector,
		OutVector& outVector,
		int gridIdx,
		int *blocks,
		size_t blocks_size) const;
		#endif

		// The following getters allow us to interface TNL with external C-like
		@@ -263,7 +284,7 @@ protected:

		Containers::Vector< Index, Device, Index > rowPointers;

		SPMVCudaKernel spmvCudaKernel;
		//SPMVCudaKernel spmvCudaKernel;

		int cudaWarpSize, hybridModeSplit;

src/TNL/Matrices/Legacy/CSR_impl.h

+459 −167

File changed.

Preview size limit exceeded, changes collapsed.