Commit 631f61f4 authored by Tomáš Oberhuber's avatar Tomáš Oberhuber
Browse files

Implementing variable group size in tnlRgCSRMatrix.

parent 148b2f63
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -984,3 +984,4 @@ bool tnlCSRMatrix< Real, Device, Index > :: read( istream& file,
}

#endif /* TNLCSRMATRIX_H_ */
+219 −137

File changed.

Preview size limit exceeded, changes collapsed.

+6 −3
Original line number Diff line number Diff line
@@ -117,7 +117,9 @@ sparse_matrix_benchmark_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
	$(sparse_matrix_benchmark_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
	$(LDFLAGS) -o $@
am__sparse_matrix_benchmark_dbg_SOURCES_DIST =  \
	sparse-matrix-benchmark.h sparse-matrix-benchmark.cpp
	sparse-matrix-benchmark.h tnlSpmvBenchmark.h \
	tnlSpmvBenchmarkCSRMatrix.h tnlSpmvBenchmarkHybridMatrix.h \
	tnlSpmvBenchmarkRgCSRMatrix.h sparse-matrix-benchmark.cpp
am__objects_6 =  \
	sparse_matrix_benchmark_dbg-sparse-matrix-benchmark.$(OBJEXT)
@BUILD_DBG_TRUE@am_sparse_matrix_benchmark_dbg_OBJECTS =  \
@@ -345,14 +347,15 @@ SUBDIRS = unit-tests

#if BUILD_CUDA
#matrix_formats_test_sources += matrix-formats-test-cuda.cu
#sparse_matrix_benchmark_sources += sparse-matrix-benchmark-cuda.cu \
#                                   cusp-test.h
#sparse_matrix_benchmark_sources += sparse-matrix-benchmark-cuda.cu                                    
#matrix_solvers_benchmark_sources += matrix-solvers-benchmark-cuda.cu                                   
#tnl_benchmarks_sources += tnl-benchmarks-cuda.cu                               
#else
matrix_formats_test_sources = matrix-formats-test.h \
	matrix-formats-test.cpp
sparse_matrix_benchmark_sources = sparse-matrix-benchmark.h \
	tnlSpmvBenchmark.h tnlSpmvBenchmarkCSRMatrix.h \
	tnlSpmvBenchmarkHybridMatrix.h tnlSpmvBenchmarkRgCSRMatrix.h \
	sparse-matrix-benchmark.cpp
matrix_solvers_benchmark_sources = matrix-solvers-benchmark.h \
	matrix-solvers-benchmark.cpp
+1 −36
Original line number Diff line number Diff line
@@ -36,24 +36,7 @@ do
      else
         echo "Matrix $unzipped_matrix.float.bin.bz2 was already converted."         
      fi
      #######
      ## Descend ordering
      ###
      if test ! -e $unzipped_matrix.descend.float.bin.bz2;
      then
         #echo "Converting $unzipped_matrix.descend.float.bin.bz2 ..."
         gunzip -f $matrix
         if test ! -e $unzipped_matrix.descend;
         then 
            echo "Ordering the matrix ..."
            ./reorder-csr -D -g 32 -i $unzipped_matrix -o $unzipped_matrix.descend
         fi        
         $TNL_MATRIX_CONVERT --input-file $unzipped_matrix.descend --output-file $unzipped_matrix.descend.float.bin.bz2 --precision float --verbose yes --verify no
         #rm $unzipped_matrix.descend
         gzip $unzipped_matrix
      else
         echo "Matrix descend-$unzipped_matrix.float.bin.bz2 was already converted."         
      fi
      
      #######
      ## AMD ordering
      ###      
@@ -83,24 +66,6 @@ do
         echo "Matrix $unzipped_matrix.double.bin.bz2 was already converted."         
      fi

      #######
      ## Descend ordering
      ###
      if test ! -e $unzipped_matrix.descend.double.bin.bz2;
      then
         #echo "Converting $unzipped_matrix.descend.double.bin.bz2 ..."
         gunzip -f $matrix
         if test ! -e $unzipped_matrix.descend;
         then 
            echo "Ordering the matrix ..."
            ./reorder-csr -D -g 32 -i $unzipped_matrix -o $unzipped_matrix.descend
         fi        
         $TNL_MATRIX_CONVERT --input-file $unzipped_matrix.descend --output-file $unzipped_matrix.descend.double.bin.bz2 --precision double --verbose yes --verify no
         #rm $unzipped_matrix.descend
         gzip $unzipped_matrix
      else
         echo "Matrix $unzipped_matrix.descend.double.bin.bz2 was already converted."         
      fi
      #######
      ## AMD ordering
      ###      
+130 −39
Original line number Diff line number Diff line
@@ -35,16 +35,24 @@ write_header()
   echo "             <td rowspan=4 align=center>CSR</td>" >> $1
   echo "             <td rowspan=4 colspan=2 align=center>Hybrid</td>" >> $1
   echo "             <td colspan=33 align=center>Row-Grouped CSR</td>" >> $1
   echo "             <td colspan=33 align=center>Row-Grouped CSR (Rows sorted decreasingly by the number of the non-zero elements)</td>" >> $1
   echo "             <td colspan=33 align=center>Row-Grouped CSR with adaptive group size set by average number of nonzeros in row</td>" >> $1   
   echo "             <td colspan=33 align=center>Row-Grouped CSR with rows sorted decreasingly by the number of the nonzeros</td>" >> $1
   echo "             <td colspan=33 align=center>Row-Grouped CSR woth rows sorted decreasingly by the number of the nonzeros and adaptiove group size set by the first group</td>" >> $1   
   echo "          </tr>" >> $1
   
   echo "          <tr>" >> $1
   echo "             <td colspan=11>Group Size = 16</td>" >> $1      # RgCSR
   echo "             <td colspan=11>Group Size = 32</td>" >> $1
   echo "             <td colspan=11>Group Size = 64</td>" >> $1
   echo "             <td colspan=11>Group Size cca. 16</td>" >> $1      # RgCSR adaptive group size
   echo "             <td colspan=11>Group Size cca. 32</td>" >> $1
   echo "             <td colspan=11>Group Size cca. 64</td>" >> $1   
   echo "             <td colspan=11>Group Size = 16</td>" >> $1      # RgCSR rows sorted decreasingly
   echo "             <td colspan=11>Group Size = 32</td>" >> $1
   echo "             <td colspan=11>Group Size = 64</td>" >> $1
   echo "             <td colspan=11>Group Size >= 16</td>" >> $1      # RgCSR rows sorted decreasingly, adaptive group size
   echo "             <td colspan=11>Group Size >= 32</td>" >> $1
   echo "             <td colspan=11>Group Size >= 64</td>" >> $1                     
   echo "          </tr>" >> $1
   
   echo "          <tr>" >> $1
@@ -58,6 +66,16 @@ write_header()
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1

   echo "             <td rowspan=2></td>" >> $1                         # RgCSR format with the group size cca. 16
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1
   echo "             <td rowspan=2></td>" >> $1                         # RgCSR format with the group size cca. 32
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1   
   echo "             <td rowspan=2></td>" >> $1                         # RgCSR format with the group size cca. 64
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1
   
   echo "             <td rowspan=2></td>" >> $1                         # RgCSR (sorted rows) format with the group size = 16
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1
@@ -67,6 +85,17 @@ write_header()
   echo "             <td rowspan=2></td>" >> $1                         # RgCSR (sorted rows) format with the group size = 64
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1
      
   echo "             <td rowspan=2></td>" >> $1                         # RgCSR (sorted rows) format with the group size >= 16
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1
   echo "             <td rowspan=2></td>" >> $1                         # RgCSR (sorted rows) format with the group size >= 32
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1   
   echo "             <td rowspan=2></td>" >> $1                         # RgCSR (sorted rows) format with the group size >= 64
   echo "             <td rowspan=2 colspan=2>CPU</td>" >> $1
   echo "             <td colspan=8>GPU</td>" >> $1            
                  
   echo "          </tr>" >> $1
   
   echo "          <tr>" >> $1
@@ -83,6 +112,20 @@ write_header()
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 256</td>" >> $1
   
   echo "             <td colspan=2>CUDA Block Size = 32</td>" >> $1        # RgCSR format with the group size cca 16
   echo "             <td colspan=2>CUDA Block Size = 64</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 256</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 32</td>" >> $1        # RgCSR format with the group size cca 32
   echo "             <td colspan=2>CUDA Block Size = 64</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 256</td>" >> $1         
   echo "             <td colspan=2>CUDA Block Size = 32</td>" >> $1        # RgCSR format with the group size cca 64
   echo "             <td colspan=2>CUDA Block Size = 64</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 256</td>" >> $1
   
   
   echo "             <td colspan=2>CUDA Block Size = 32</td>" >> $1        # RgCSR (sorted rows) format with the group size = 16
   echo "             <td colspan=2>CUDA Block Size = 64</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
@@ -96,6 +139,19 @@ write_header()
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 256</td>" >> $1
   
   echo "             <td colspan=2>CUDA Block Size = 32</td>" >> $1        # RgCSR (sorted rows) format with the group size >= 16
   echo "             <td colspan=2>CUDA Block Size = 64</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 256</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 32</td>" >> $1        # RgCSR (sorted rows) format with the group size >= 32
   echo "             <td colspan=2>CUDA Block Size = 64</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 256</td>" >> $1         
   echo "             <td colspan=2>CUDA Block Size = 32</td>" >> $1        # RgCSR (sorted rows) format with the group size >= 64
   echo "             <td colspan=2>CUDA Block Size = 64</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 128</td>" >> $1
   echo "             <td colspan=2>CUDA Block Size = 256</td>" >> $1                                          
            
   echo "          </tr>" >> $1      
         
   echo "          <tr>" >> $1
@@ -104,8 +160,10 @@ write_header()
   echo "             <td>NonZeros No.</td>" >> $1
   echo "             <td>NonZeros %</td>" >> $1
   echo "             <td>GFlops</td>" >> $1                    # CSR format on CPU
   
   echo "             <td>GFlops</td>" >> $1                    # Hybrid format Bell, Garland on GPU
   echo "             <td>Speed-up</td>" >> $1
   
   echo "             <td>Artificial Zeros</td>" >> $1          # RgCSR format with the group size = 16
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
@@ -140,6 +198,40 @@ write_header()
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   
   echo "             <td>Artificial Zeros</td>" >> $1          # RgCSR format with the group size cca 16
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1      
   echo "             <td>Artificial Zeros</td>" >> $1          # RgCSR format with the group size cca 32
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1      
   echo "             <td>Artificial Zeros</td>" >> $1          # RgCSR format with the group size cca 64
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   
   echo "             <td>Artificial Zeros</td>" >> $1          # RgCSR (sorted rows) format with the group size = 16
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
@@ -174,6 +266,39 @@ write_header()
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   
   echo "             <td>Artificial Zeros</td>" >> $1          # RgCSR (sorted rows) format with the group size >= 16
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1      
   echo "             <td>Artificial Zeros</td>" >> $1          # RgCSR (sorted rows) format with the group size >= 32
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1      
   echo "             <td>Artificial Zeros</td>" >> $1          # RgCSR (sorted rows) format with the group size >= 64
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   echo "             <td>GFlops</td>" >> $1
   echo "             <td>Speed-up</td>" >> $1
   
   
   
@@ -235,25 +360,7 @@ do
         $SPARSE_MATRIX_BENCHMARK --input-file $unzipped_matrix.double.bin.bz2 --input-mtx-file $unzipped_matrix --log-file sparse-matrix-benchmark-double.log.html --stop-time $STOP_TIME --verbose 1
         gzip $unzipped_matrix
      fi
      #######
      ## Descend ordering
      ###  
      if test ! -e $unzipped_matrix.descend.float.bin.bz2;
      then
         echo "Missing $unzipped_matrix.descend.float.bin.bz2 !!! Run the script 'convert-matrices'."
      else
         echo "Checking with the matrix $unzipped_matrix.descend in single precision ..."
         export CUDA_PROFILE_LOG=$unzipped_matrix.descend.float.log
         $SPARSE_MATRIX_BENCHMARK --input-file $unzipped_matrix.descend.float.bin.bz2 --input-mtx-file $unzipped_matrix.descend --log-file sparse-matrix-benchmark-descend-float.log --stop-time $STOP_TIME --verbose 1
      fi
      if test ! -e $unzipped_matrix.descend.double.bin.bz2;
      then
         echo "Missing $unzipped_matrix.descend.double.bin.bz2 !!! Run the script 'convert-matrices'."
      else
         echo "Checking with the matrix $unzipped_matrix.descend in double precison ..."
         export CUDA_PROFILE_LOG=$unzipped_matrix.descend.double.log 
         $SPARSE_MATRIX_BENCHMARK --input-file $unzipped_matrix.descend.double.bin.bz2 --input-mtx-file $unzipped_matrix.descend --log-file sparse-matrix-benchmark-descend-double.log --stop-time $STOP_TIME --verbose 1
      fi

      #######
      ## AMD ordering
      ###  
@@ -309,23 +416,7 @@ do
            echo "Checking with the matrix $file ..."
            $SPARSE_MATRIX_BENCHMARK --input-file $file.double.bin.bz2 --input-mtx-file $file --log-file sparse-matrix-benchmark-double.log.html --stop-time $STOP_TIME --verbose 1                        
         fi                           
         #######
         ## Descend ordering
         ###  
         if test ! -e $file.descend.float.bin.bz2;
         then
            echo "Missing $file.descend.float.bin.bz2 !!! Run the script 'convert-matrices'."
         else   
            echo "Checking with the matrix $file.descend ..."
            $SPARSE_MATRIX_BENCHMARK --input-file $file.descend.float.bin.bz2 --input-mtx-file $file.descend --log-file sparse-matrix-benchmark-descend-float.log --stop-time $STOP_TIME --verbose 1                        
         fi
         if test ! -e $file.descend.double.bin.bz2;
         then
            echo "Missing $file.descend.double.bin.bz2 !!! Run the script 'convert-matrices'."
         else   
            echo "Checking with the matrix $file ..."
            $SPARSE_MATRIX_BENCHMARK --input-file $file.descend.double.bin.bz2 --input-mtx-file $file.descend --log-file sparse-matrix-benchmark-descend-double.log --stop-time $STOP_TIME --verbose 1                        
         fi                           
         
         #######
         ## AMD ordering
         ###  
Loading