Implementing variable group size in tnlRgCSRMatrix. (631f61f4) · Commits · TNL / tnl-dev

src/matrix/tnlCSRMatrix.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -984,3 +984,4 @@ bool tnlCSRMatrix< Real, Device, Index > :: read( istream& file,
		}

		#endif /* TNLCSRMATRIX_H_ */

src/matrix/tnlRgCSRMatrix.h

+219 −137

File changed.

Preview size limit exceeded, changes collapsed.

tests/Makefile.in

+6 −3

Original line number	Diff line number	Diff line
		@@ -117,7 +117,9 @@ sparse_matrix_benchmark_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
		$(sparse_matrix_benchmark_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
		$(LDFLAGS) -o $@
		am__sparse_matrix_benchmark_dbg_SOURCES_DIST = \
		sparse-matrix-benchmark.h sparse-matrix-benchmark.cpp
		sparse-matrix-benchmark.h tnlSpmvBenchmark.h \
		tnlSpmvBenchmarkCSRMatrix.h tnlSpmvBenchmarkHybridMatrix.h \
		tnlSpmvBenchmarkRgCSRMatrix.h sparse-matrix-benchmark.cpp
		am__objects_6 = \
		sparse_matrix_benchmark_dbg-sparse-matrix-benchmark.$(OBJEXT)
		@BUILD_DBG_TRUE@am_sparse_matrix_benchmark_dbg_OBJECTS = \
		@@ -345,14 +347,15 @@ SUBDIRS = unit-tests

		#if BUILD_CUDA
		#matrix_formats_test_sources += matrix-formats-test-cuda.cu
		#sparse_matrix_benchmark_sources += sparse-matrix-benchmark-cuda.cu \
		# cusp-test.h
		#sparse_matrix_benchmark_sources += sparse-matrix-benchmark-cuda.cu
		#matrix_solvers_benchmark_sources += matrix-solvers-benchmark-cuda.cu
		#tnl_benchmarks_sources += tnl-benchmarks-cuda.cu
		#else
		matrix_formats_test_sources = matrix-formats-test.h \
		matrix-formats-test.cpp
		sparse_matrix_benchmark_sources = sparse-matrix-benchmark.h \
		tnlSpmvBenchmark.h tnlSpmvBenchmarkCSRMatrix.h \
		tnlSpmvBenchmarkHybridMatrix.h tnlSpmvBenchmarkRgCSRMatrix.h \
		sparse-matrix-benchmark.cpp
		matrix_solvers_benchmark_sources = matrix-solvers-benchmark.h \
		matrix-solvers-benchmark.cpp

tests/convert-matrices

+1 −36

Original line number	Diff line number	Diff line
		@@ -36,24 +36,7 @@ do
		else
		echo "Matrix $unzipped_matrix.float.bin.bz2 was already converted."
		fi
		#######
		## Descend ordering
		###
		if test ! -e $unzipped_matrix.descend.float.bin.bz2;
		then
		#echo "Converting $unzipped_matrix.descend.float.bin.bz2 ..."
		gunzip -f $matrix
		if test ! -e $unzipped_matrix.descend;
		then
		echo "Ordering the matrix ..."
		./reorder-csr -D -g 32 -i $unzipped_matrix -o $unzipped_matrix.descend
		fi
		$TNL_MATRIX_CONVERT --input-file $unzipped_matrix.descend --output-file $unzipped_matrix.descend.float.bin.bz2 --precision float --verbose yes --verify no
		#rm $unzipped_matrix.descend
		gzip $unzipped_matrix
		else
		echo "Matrix descend-$unzipped_matrix.float.bin.bz2 was already converted."
		fi

		#######
		## AMD ordering
		###
		@@ -83,24 +66,6 @@ do
		echo "Matrix $unzipped_matrix.double.bin.bz2 was already converted."
		fi

		#######
		## Descend ordering
		###
		if test ! -e $unzipped_matrix.descend.double.bin.bz2;
		then
		#echo "Converting $unzipped_matrix.descend.double.bin.bz2 ..."
		gunzip -f $matrix
		if test ! -e $unzipped_matrix.descend;
		then
		echo "Ordering the matrix ..."
		./reorder-csr -D -g 32 -i $unzipped_matrix -o $unzipped_matrix.descend
		fi
		$TNL_MATRIX_CONVERT --input-file $unzipped_matrix.descend --output-file $unzipped_matrix.descend.double.bin.bz2 --precision double --verbose yes --verify no
		#rm $unzipped_matrix.descend
		gzip $unzipped_matrix
		else
		echo "Matrix $unzipped_matrix.descend.double.bin.bz2 was already converted."
		fi
		#######
		## AMD ordering
		###

tests/run-sparse-matrix-benchmark

+130 −39

Original line number	Diff line number	Diff line
		@@ -35,16 +35,24 @@ write_header()
		echo " <td rowspan=4 align=center>CSR</td>" >> $1
		echo " <td rowspan=4 colspan=2 align=center>Hybrid</td>" >> $1
		echo " <td colspan=33 align=center>Row-Grouped CSR</td>" >> $1
		echo " <td colspan=33 align=center>Row-Grouped CSR (Rows sorted decreasingly by the number of the non-zero elements)</td>" >> $1
		echo " <td colspan=33 align=center>Row-Grouped CSR with adaptive group size set by average number of nonzeros in row</td>" >> $1
		echo " <td colspan=33 align=center>Row-Grouped CSR with rows sorted decreasingly by the number of the nonzeros</td>" >> $1
		echo " <td colspan=33 align=center>Row-Grouped CSR woth rows sorted decreasingly by the number of the nonzeros and adaptiove group size set by the first group</td>" >> $1
		echo " </tr>" >> $1

		echo " <tr>" >> $1
		echo " <td colspan=11>Group Size = 16</td>" >> $1 # RgCSR
		echo " <td colspan=11>Group Size = 32</td>" >> $1
		echo " <td colspan=11>Group Size = 64</td>" >> $1
		echo " <td colspan=11>Group Size cca. 16</td>" >> $1 # RgCSR adaptive group size
		echo " <td colspan=11>Group Size cca. 32</td>" >> $1
		echo " <td colspan=11>Group Size cca. 64</td>" >> $1
		echo " <td colspan=11>Group Size = 16</td>" >> $1 # RgCSR rows sorted decreasingly
		echo " <td colspan=11>Group Size = 32</td>" >> $1
		echo " <td colspan=11>Group Size = 64</td>" >> $1
		echo " <td colspan=11>Group Size >= 16</td>" >> $1 # RgCSR rows sorted decreasingly, adaptive group size
		echo " <td colspan=11>Group Size >= 32</td>" >> $1
		echo " <td colspan=11>Group Size >= 64</td>" >> $1
		echo " </tr>" >> $1

		echo " <tr>" >> $1
		@@ -58,6 +66,16 @@ write_header()
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1

		echo " <td rowspan=2></td>" >> $1 # RgCSR format with the group size cca. 16
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1
		echo " <td rowspan=2></td>" >> $1 # RgCSR format with the group size cca. 32
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1
		echo " <td rowspan=2></td>" >> $1 # RgCSR format with the group size cca. 64
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1

		echo " <td rowspan=2></td>" >> $1 # RgCSR (sorted rows) format with the group size = 16
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1
		@@ -67,6 +85,17 @@ write_header()
		echo " <td rowspan=2></td>" >> $1 # RgCSR (sorted rows) format with the group size = 64
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1

		echo " <td rowspan=2></td>" >> $1 # RgCSR (sorted rows) format with the group size >= 16
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1
		echo " <td rowspan=2></td>" >> $1 # RgCSR (sorted rows) format with the group size >= 32
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1
		echo " <td rowspan=2></td>" >> $1 # RgCSR (sorted rows) format with the group size >= 64
		echo " <td rowspan=2 colspan=2>CPU</td>" >> $1
		echo " <td colspan=8>GPU</td>" >> $1

		echo " </tr>" >> $1

		echo " <tr>" >> $1
		@@ -83,6 +112,20 @@ write_header()
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 256</td>" >> $1

		echo " <td colspan=2>CUDA Block Size = 32</td>" >> $1 # RgCSR format with the group size cca 16
		echo " <td colspan=2>CUDA Block Size = 64</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 256</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 32</td>" >> $1 # RgCSR format with the group size cca 32
		echo " <td colspan=2>CUDA Block Size = 64</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 256</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 32</td>" >> $1 # RgCSR format with the group size cca 64
		echo " <td colspan=2>CUDA Block Size = 64</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 256</td>" >> $1


		echo " <td colspan=2>CUDA Block Size = 32</td>" >> $1 # RgCSR (sorted rows) format with the group size = 16
		echo " <td colspan=2>CUDA Block Size = 64</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		@@ -96,6 +139,19 @@ write_header()
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 256</td>" >> $1

		echo " <td colspan=2>CUDA Block Size = 32</td>" >> $1 # RgCSR (sorted rows) format with the group size >= 16
		echo " <td colspan=2>CUDA Block Size = 64</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 256</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 32</td>" >> $1 # RgCSR (sorted rows) format with the group size >= 32
		echo " <td colspan=2>CUDA Block Size = 64</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 256</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 32</td>" >> $1 # RgCSR (sorted rows) format with the group size >= 64
		echo " <td colspan=2>CUDA Block Size = 64</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 128</td>" >> $1
		echo " <td colspan=2>CUDA Block Size = 256</td>" >> $1

		echo " </tr>" >> $1

		echo " <tr>" >> $1
		@@ -104,8 +160,10 @@ write_header()
		echo " <td>NonZeros No.</td>" >> $1
		echo " <td>NonZeros %</td>" >> $1
		echo " <td>GFlops</td>" >> $1 # CSR format on CPU

		echo " <td>GFlops</td>" >> $1 # Hybrid format Bell, Garland on GPU
		echo " <td>Speed-up</td>" >> $1

		echo " <td>Artificial Zeros</td>" >> $1 # RgCSR format with the group size = 16
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		@@ -140,6 +198,40 @@ write_header()
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1

		echo " <td>Artificial Zeros</td>" >> $1 # RgCSR format with the group size cca 16
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>Artificial Zeros</td>" >> $1 # RgCSR format with the group size cca 32
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>Artificial Zeros</td>" >> $1 # RgCSR format with the group size cca 64
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1

		echo " <td>Artificial Zeros</td>" >> $1 # RgCSR (sorted rows) format with the group size = 16
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		@@ -174,6 +266,39 @@ write_header()
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1

		echo " <td>Artificial Zeros</td>" >> $1 # RgCSR (sorted rows) format with the group size >= 16
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>Artificial Zeros</td>" >> $1 # RgCSR (sorted rows) format with the group size >= 32
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>Artificial Zeros</td>" >> $1 # RgCSR (sorted rows) format with the group size >= 64
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1
		echo " <td>GFlops</td>" >> $1
		echo " <td>Speed-up</td>" >> $1



		@@ -235,25 +360,7 @@ do
		$SPARSE_MATRIX_BENCHMARK --input-file $unzipped_matrix.double.bin.bz2 --input-mtx-file $unzipped_matrix --log-file sparse-matrix-benchmark-double.log.html --stop-time $STOP_TIME --verbose 1
		gzip $unzipped_matrix
		fi
		#######
		## Descend ordering
		###
		if test ! -e $unzipped_matrix.descend.float.bin.bz2;
		then
		echo "Missing $unzipped_matrix.descend.float.bin.bz2 !!! Run the script 'convert-matrices'."
		else
		echo "Checking with the matrix $unzipped_matrix.descend in single precision ..."
		export CUDA_PROFILE_LOG=$unzipped_matrix.descend.float.log
		$SPARSE_MATRIX_BENCHMARK --input-file $unzipped_matrix.descend.float.bin.bz2 --input-mtx-file $unzipped_matrix.descend --log-file sparse-matrix-benchmark-descend-float.log --stop-time $STOP_TIME --verbose 1
		fi
		if test ! -e $unzipped_matrix.descend.double.bin.bz2;
		then
		echo "Missing $unzipped_matrix.descend.double.bin.bz2 !!! Run the script 'convert-matrices'."
		else
		echo "Checking with the matrix $unzipped_matrix.descend in double precison ..."
		export CUDA_PROFILE_LOG=$unzipped_matrix.descend.double.log
		$SPARSE_MATRIX_BENCHMARK --input-file $unzipped_matrix.descend.double.bin.bz2 --input-mtx-file $unzipped_matrix.descend --log-file sparse-matrix-benchmark-descend-double.log --stop-time $STOP_TIME --verbose 1
		fi

		#######
		## AMD ordering
		###
		@@ -309,23 +416,7 @@ do
		echo "Checking with the matrix $file ..."
		$SPARSE_MATRIX_BENCHMARK --input-file $file.double.bin.bz2 --input-mtx-file $file --log-file sparse-matrix-benchmark-double.log.html --stop-time $STOP_TIME --verbose 1
		fi
		#######
		## Descend ordering
		###
		if test ! -e $file.descend.float.bin.bz2;
		then
		echo "Missing $file.descend.float.bin.bz2 !!! Run the script 'convert-matrices'."
		else
		echo "Checking with the matrix $file.descend ..."
		$SPARSE_MATRIX_BENCHMARK --input-file $file.descend.float.bin.bz2 --input-mtx-file $file.descend --log-file sparse-matrix-benchmark-descend-float.log --stop-time $STOP_TIME --verbose 1
		fi
		if test ! -e $file.descend.double.bin.bz2;
		then
		echo "Missing $file.descend.double.bin.bz2 !!! Run the script 'convert-matrices'."
		else
		echo "Checking with the matrix $file ..."
		$SPARSE_MATRIX_BENCHMARK --input-file $file.descend.double.bin.bz2 --input-mtx-file $file.descend --log-file sparse-matrix-benchmark-descend-double.log --stop-time $STOP_TIME --verbose 1
		fi

		#######
		## AMD ordering
		###