Commit 9d4cf490 authored by Jakub Klinkovský's avatar Jakub Klinkovský
Browse files

working on the MHFEM section - added Hypre results

parent 43775289
Loading
Loading
Loading
Loading
+22 −10
Original line number Diff line number Diff line
@@ -8,17 +8,26 @@
%   \usepackage{stackengine}
%   \usepackage[np]{numprint}

\begin{tabular}{rN{5}{1} N{2}{1} N{1}{2} N{5}{1} N{2}{1} N{1}{2}}
\begin{tabular}{rN{5}{1} N{2}{1} N{1}{2} N{5}{1} N{2}{1} N{1}{2} N{4}{1} N{2}{1} N{1}{2}}
\toprule

% header row 0
  &  \multicolumn{6}{c}{TNL}
  &  \multicolumn{3}{c}{Hypre}
  \\
\cmidrule(lr){2-7}
\cmidrule(l){8-10}

% header row 1
  &  \multicolumn{3}{c}{OpenMP}
  &  \multicolumn{3}{c}{MPI}
  &  \multicolumn{3}{c}{MPI}
  \\
\cmidrule(lr){2-4}
\cmidrule(l){5-7}
\cmidrule(lr){5-7}
\cmidrule(l){8-10}

% header row 1
% header row 2
\multicolumn{1}{c}{Cores}
  &  \multicolumn{1}{c}{$ CT $}
  &  \multicolumn{1}{c}{$ Sp $}
@@ -26,6 +35,9 @@
  &  \multicolumn{1}{c}{$ CT $}
  &  \multicolumn{1}{c}{$ Sp $}
  &  \multicolumn{1}{c}{$ E\!f\!f $}
  &  \multicolumn{1}{c}{$ CT $}
  &  \multicolumn{1}{c}{$ Sp $}
  &  \multicolumn{1}{c}{$ E\!f\!f $}
  \\

\midrule
@@ -34,37 +46,37 @@
        $ \np{1} $
          &  

10743.9  &  1.0  &  1.00  &  10800.2  &  1.0  &  1.00 \\
10743.9  &  1.0  &  1.00  &  10800.2  &  1.0  &  1.00  &  3510.1  &  1.0  &  1.00 \\

        $ \np{2} $
          &  

6349.0  &  1.7  &  0.85  &  5693.5  &  1.9  &  0.95 \\
6349.0  &  1.7  &  0.85  &  5693.5  &  1.9  &  0.95  &  2058.0  &  1.7  &  0.85 \\

        $ \np{4} $
          &  

3375.9  &  3.2  &  0.80  &  3143.0  &  3.4  &  0.86 \\
3375.9  &  3.2  &  0.80  &  3143.0  &  3.4  &  0.86  &  1097.1  &  3.2  &  0.80 \\

        $ \np{6} $
          &  

2294.6  &  4.7  &  0.78  &  2506.0  &  4.3  &  0.72 \\
2294.6  &  4.7  &  0.78  &  2506.0  &  4.3  &  0.72  &  750.5  &  4.7  &  0.78 \\

        $ \np{8} $
          &  

1818.1  &  5.9  &  0.74  &  1787.6  &  6.0  &  0.76 \\
1818.1  &  5.9  &  0.74  &  1787.6  &  6.0  &  0.76  &  587.6  &  6.0  &  0.75 \\

        $ \np{12} $
          &  

1296.2  &  8.3  &  0.69  &  1096.8  &  9.8  &  0.82 \\
1296.2  &  8.3  &  0.69  &  1096.8  &  9.8  &  0.82  &  424.8  &  8.3  &  0.69 \\

        $ \np{24} $
          &  

977.0  &  11.0  &  0.46  &  549.3  &  19.7  &  0.82 \\
977.0  &  11.0  &  0.46  &  549.3  &  19.7  &  0.82  &  215.5  &  16.3  &  0.68 \\

\bottomrule
\end{tabular}
+39 −25
Original line number Diff line number Diff line
@@ -8,19 +8,30 @@
%   \usepackage{stackengine}
%   \usepackage[np]{numprint}

\begin{tabular}{rrrN{6}{1} N{2}{1} N{1}{2} N{6}{1} N{3}{1} N{1}{2}}
\begin{tabular}{rrrN{6}{1} N{2}{1} N{1}{2} N{6}{1} N{3}{1} N{1}{2} N{5}{1} N{3}{1} N{1}{2}}
\toprule

% header row 0
  &  
  &  
  &  \multicolumn{6}{c}{TNL}
  &  \multicolumn{3}{c}{Hypre}
  \\
\cmidrule(lr){4-9}
\cmidrule(l){10-12}

% header row 1
  &  
  &  
  &  \multicolumn{3}{c}{OpenMP}
  &  \multicolumn{3}{c}{MPI}
  &  \multicolumn{3}{c}{MPI}
  \\
\cmidrule(lr){4-6}
\cmidrule(l){7-9}
\cmidrule(lr){7-9}
\cmidrule(l){10-12}

% header row 1
% header row 2
\multicolumn{1}{c}{Cores}
  &  \multicolumn{1}{c}{CPUs}
  &  \multicolumn{1}{c}{Nodes}
@@ -30,76 +41,79 @@
  &  \multicolumn{1}{c}{$ CT $}
  &  \multicolumn{1}{c}{$ Sp $}
  &  \multicolumn{1}{c}{$ E\!f\!f $}
  &  \multicolumn{1}{c}{$ CT $}
  &  \multicolumn{1}{c}{$ Sp $}
  &  \multicolumn{1}{c}{$ E\!f\!f $}
  \\

\midrule


$ \np{1} $  &    &    &  
188243.0  &  1.0  &  1.00  &  188706.0  &  1.0  &  1.00 \\
188243.0  &  1.0  &  1.00  &  188706.0  &  1.0  &  1.00  &  37991.2  &  1.0  &  1.00 \\

$ \np{2} $  &    &    &  
102074.0  &  1.8  &  0.92  &  93659.1  &  2.0  &  1.01 \\
102074.0  &  1.8  &  0.92  &  93659.1  &  2.0  &  1.01  &  21170.2  &  1.8  &  0.90 \\

$ \np{4} $  &    &    &  
55937.6  &  3.4  &  0.84  &  49553.0  &  3.8  &  0.95 \\
55937.6  &  3.4  &  0.84  &  49553.0  &  3.8  &  0.95  &  11252.2  &  3.4  &  0.84 \\

$ \np{6} $  &    &    &  
40796.4  &  4.6  &  0.77  &  35594.3  &  5.3  &  0.88 \\
40796.4  &  4.6  &  0.77  &  35594.3  &  5.3  &  0.88  &  7798.1  &  4.9  &  0.81 \\

$ \np{8} $  &    &    &  
32026.3  &  5.9  &  0.73  &  28958.6  &  6.5  &  0.81 \\
32026.3  &  5.9  &  0.73  &  28958.6  &  6.5  &  0.81  &  6085.4  &  6.2  &  0.78 \\

$ \np{12} $  &  $ \np{1} $  &  $ 1/2 $  &  
26369.7  &  7.1  &  0.59  &  23839.0  &  7.9  &  0.66 \\
26369.7  &  7.1  &  0.59  &  23839.0  &  7.9  &  0.66  &  4708.8  &  8.1  &  0.67 \\

$ \np{24} $  &  $ \np{2} $  &  $ \np{1} $  &  
15695.0  &  12.0  &  0.50  &  12184.2  &  15.5  &  0.65 \\
15695.0  &  12.0  &  0.50  &  12184.2  &  15.5  &  0.65  &  2485.0  &  15.3  &  0.64 \\

$ \np{48} $  &  $ \np{4} $  &  $ \np{2} $  &  
  &    &    &  6171.4  &  30.6  &  0.64 \\
  &    &    &  6171.4  &  30.6  &  0.64  &  1249.1  &  30.4  &  0.63 \\

$ \np{72} $  &  $ \np{6} $  &  $ \np{3} $  &  
  &    &    &  4026.3  &  46.9  &  0.65 \\
  &    &    &  4026.3  &  46.9  &  0.65  &  880.2  &  43.2  &  0.60 \\

$ \np{96} $  &  $ \np{8} $  &  $ \np{4} $  &  
  &    &    &  3016.0  &  62.6  &  0.65 \\
  &    &    &  3016.0  &  62.6  &  0.65  &  592.3  &  64.1  &  0.67 \\

$ \np{120} $  &  $ \np{10} $  &  $ \np{5} $  &  
  &    &    &  2374.4  &  79.5  &  0.66 \\
  &    &    &  2374.4  &  79.5  &  0.66  &  471.2  &  80.6  &  0.67 \\

$ \np{144} $  &  $ \np{12} $  &  $ \np{6} $  &  
  &    &    &  1968.2  &  95.9  &  0.67 \\
  &    &    &  1968.2  &  95.9  &  0.67  &  415.8  &  91.4  &  0.63 \\

$ \np{168} $  &  $ \np{14} $  &  $ \np{7} $  &  
  &    &    &  1643.1  &  114.8  &  0.68 \\
  &    &    &  1643.1  &  114.8  &  0.68  &  372.2  &  102.1  &  0.61 \\

$ \np{192} $  &  $ \np{16} $  &  $ \np{8} $  &  
  &    &    &  1410.4  &  133.8  &  0.70 \\
  &    &    &  1410.4  &  133.8  &  0.70  &  310.7  &  122.3  &  0.64 \\

$ \np{216} $  &  $ \np{18} $  &  $ \np{9} $  &  
  &    &    &  1242.5  &  151.9  &  0.70 \\
  &    &    &  1242.5  &  151.9  &  0.70  &  277.5  &  136.9  &  0.63 \\

$ \np{240} $  &  $ \np{20} $  &  $ \np{10} $  &  
  &    &    &  1114.3  &  169.4  &  0.71 \\
  &    &    &  1114.3  &  169.4  &  0.71  &  240.3  &  158.1  &  0.66 \\

$ \np{264} $  &  $ \np{22} $  &  $ \np{11} $  &  
  &    &    &  1003.8  &  188.0  &  0.71 \\
  &    &    &  1003.8  &  188.0  &  0.71  &  251.5  &  151.0  &  0.57 \\

$ \np{288} $  &  $ \np{24} $  &  $ \np{12} $  &  
  &    &    &  924.2  &  204.2  &  0.71 \\
  &    &    &  924.2  &  204.2  &  0.71  &  223.9  &  169.7  &  0.59 \\

$ \np{312} $  &  $ \np{26} $  &  $ \np{13} $  &  
  &    &    &  860.5  &  219.3  &  0.70 \\
  &    &    &  860.5  &  219.3  &  0.70  &  202.9  &  187.2  &  0.60 \\

$ \np{336} $  &  $ \np{28} $  &  $ \np{14} $  &  
  &    &    &  807.3  &  233.8  &  0.70 \\
  &    &    &  807.3  &  233.8  &  0.70  &  201.9  &  188.2  &  0.56 \\

$ \np{360} $  &  $ \np{30} $  &  $ \np{15} $  &  
  &    &    &  761.6  &  247.8  &  0.69 \\
  &    &    &  761.6  &  247.8  &  0.69  &    &    &   \\

$ \np{384} $  &  $ \np{32} $  &  $ \np{16} $  &  
  &    &    &  702.4  &  268.7  &  0.70 \\
  &    &    &  702.4  &  268.7  &  0.70  &    &    &   \\

\bottomrule
\end{tabular}
+24 −7
Original line number Diff line number Diff line
@@ -8,18 +8,35 @@
%   \usepackage{stackengine}
%   \usepackage[np]{numprint}

\begin{tabular}{rN{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2}}
\begin{tabular}{rN{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2} N{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2}}
\toprule

% header row 0
  &  \multicolumn{6}{c}{TNL}
  &  \multicolumn{6}{c}{Hypre}
  \\
\cmidrule(lr){2-7}
\cmidrule(l){8-13}

% header row 1
  &  \multicolumn{3}{c}{2D$^\triangle_5$}
  &  \multicolumn{3}{c}{3D$^\triangle_5$}
  &  \multicolumn{3}{c}{2D$^\triangle_5$}
  &  \multicolumn{3}{c}{3D$^\triangle_5$}
  \\
\cmidrule(lr){2-4}
\cmidrule(l){5-7}
\cmidrule(lr){5-7}
\cmidrule(lr){8-10}
\cmidrule(l){11-13}

% header row 1
% header row 2
\multicolumn{1}{c}{GPUs}
  &  \multicolumn{1}{c}{$ CT $}
  &  \multicolumn{1}{c}{$ Sp $}
  &  \multicolumn{1}{c}{$ E\!f\!f $}
  &  \multicolumn{1}{c}{$ CT $}
  &  \multicolumn{1}{c}{$ Sp $}
  &  \multicolumn{1}{c}{$ E\!f\!f $}
  &  \multicolumn{1}{c}{$ CT $}
  &  \multicolumn{1}{c}{$ Sp $}
  &  \multicolumn{1}{c}{$ E\!f\!f $}
@@ -34,22 +51,22 @@
        $ \np{1} $
          &  

528.6  &  1.0  &  1.00  &  2654.8  &  1.0  &  1.00 \\
528.6  &  1.0  &  1.00  &  2654.8  &  1.0  &  1.00  &  389.8  &  1.0  &  1.00  &  2014.5  &  1.0  &  1.00 \\

        $ \np{2} $
          &  

566.1  &  0.9  &  0.47  &  1415.4  &  1.9  &  0.94 \\
566.1  &  0.9  &  0.47  &  1415.4  &  1.9  &  0.94  &  500.6  &  0.8  &  0.39  &  1233.1  &  1.6  &  0.82 \\

        $ \np{3} $
          &  

642.5  &  0.8  &  0.27  &  996.7  &  2.7  &  0.89 \\
642.5  &  0.8  &  0.27  &  996.7  &  2.7  &  0.89  &  634.1  &  0.6  &  0.20  &  868.9  &  2.3  &  0.77 \\

        $ \np{4} $
          &  

709.7  &  0.7  &  0.19  &  793.3  &  3.3  &  0.84 \\
709.7  &  0.7  &  0.19  &  793.3  &  3.3  &  0.84  &  726.8  &  0.5  &  0.13  &  704.2  &  2.9  &  0.72 \\

\bottomrule
\end{tabular}
+55 −0
Original line number Diff line number Diff line
+-------------------------------------------------------------------------------+
|                                                                               |
|                                NumDwarf solver                                |
|                                                                               |
+-------------------------------------------------------------------------------+
| MPI processes:                                                              1 |
| Device type:                                               TNL::Devices::Host |
|  OMP enabled:                                                              no |
| Real type:                                                             double |
| Index type:                                                               int |
| Mesh type:TNL::Meshes::Mesh<TNL::Meshes::DefaultConfig<TNL::Meshes::Topologies::Tetrahedron, 3, double, int, short>, TNL::Devices::Host> |
| Sparse matrix:TNL::Matrices::SparseMatrix<double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight, double, std::allocator<double>, std::allocator<int> > |
| Mass lumping:                                                        disabled |
| Material model:                                                   BrooksCorey |
| Formulation:                                                             PwPn |
+-------------------------------------------------------------------------------+
| Host name:                                                                n16 |
| System:                                                                 Linux |
| Release:                                          3.10.0-1127.13.1.el7.x86_64 |
| Architecture:                                                          x86_64 |
| TNL compiler:                                                GNU G++ (10.2.0) |
| CPU info                                                                      |
|  Model name:                         Intel(R) Xeon(R) Gold 6136 CPU @ 3.00GHz |
|  Cores:                                                                    12 |
|  Threads per core:                                                          2 |
|  Max clock rate (in MHz):                                                3001 |
|  Cache (L1d, L1i, L2, L3):                                32, 32, 1024, 25344 |
+-------------------------------------------------------------------------------+
| Started at:                                         Fri Jul 29 2022, 20:13:07 |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
| Finished at:                                        Fri Jul 29 2022, 20:13:07 |
| Total number of linear solver iterations:                                 351 |
| Total number of time steps:                                                24 |
| Number of time steps in which the preconditioner was updated:               2 |
| Pre-iterate time: avg: 4.930318e-02 stddev: 0.000000e+00 min: 4.930318e-02 max: 4.930318e-02 |
|   nonlinear update time: avg: 1.725292e-02 stddev: 0.000000e+00 min: 1.725292e-02 max: 1.725292e-02 |
|   update_b time: avg: 1.540224e-02 stddev: 0.000000e+00 min: 1.540224e-02 max: 1.540224e-02 |
|   upwind update time: avg: 9.500597e-03 stddev: 0.000000e+00 min: 9.500597e-03 max: 9.500597e-03 |
|   upwind MPI synchronization time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 |
|   update_R time: avg: 3.426872e-03 stddev: 0.000000e+00 min: 3.426872e-03 max: 3.426872e-03 |
|   update_Q time: avg: 3.423305e-03 stddev: 0.000000e+00 min: 3.423305e-03 max: 3.423305e-03 |
|   model pre-iterate time: avg: 2.461200e-05 stddev: 0.000000e+00 min: 2.461200e-05 max: 2.461200e-05 |
| Linear system assembler time: avg: 2.796267e-02 stddev: 0.000000e+00 min: 2.796267e-02 max: 2.796267e-02 |
| Hypre matrix conversion time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 |
| Hypre setup time: avg: 1.124451e-02 stddev: 0.000000e+00 min: 1.124451e-02 max: 1.124451e-02 |
| Hypre solve time: avg: 4.931529e-01 stddev: 0.000000e+00 min: 4.931529e-01 max: 4.931529e-01 |
| Post-iterate time: avg: 3.499364e-03 stddev: 0.000000e+00 min: 3.499364e-03 max: 3.499364e-03 |
|   Z_iF -> Z_iK update time: avg: 4.951700e-04 stddev: 0.000000e+00 min: 4.951700e-04 max: 4.951700e-04 |
|   velocities update time: avg: 2.862900e-03 stddev: 0.000000e+00 min: 2.862900e-03 max: 2.862900e-03 |
|   model post-iterate time: avg: 2.453500e-05 stddev: 0.000000e+00 min: 2.453500e-05 max: 2.453500e-05 |
| Compute time:                                                        0.585421 |
| I/O time:                                                           0.0168328 |
| Total time:                                                          0.620314 |
+-------------------------------------------------------------------------------+
+62 −0
Original line number Diff line number Diff line
+-------------------------------------------------------------------------------+
|                                                                               |
|                                NumDwarf solver                                |
|                                                                               |
+-------------------------------------------------------------------------------+
| MPI processes:                                                             12 |
| Device type:                                               TNL::Devices::Host |
|  OMP enabled:                                                              no |
| Real type:                                                             double |
| Index type:                                                               int |
| Mesh type:TNL::Meshes::Mesh<TNL::Meshes::DefaultConfig<TNL::Meshes::Topologies::Tetrahedron, 3, double, int, short>, TNL::Devices::Host> |
| Sparse matrix:TNL::Matrices::SparseMatrix<double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight, double, std::allocator<double>, std::allocator<int> > |
| Mass lumping:                                                        disabled |
| Material model:                                                   BrooksCorey |
| Formulation:                                                             PwPn |
+-------------------------------------------------------------------------------+
| Host name:                                                                n11 |
| System:                                                                 Linux |
| Release:                                          3.10.0-1127.13.1.el7.x86_64 |
| Architecture:                                                          x86_64 |
| TNL compiler:                                                GNU G++ (10.2.0) |
| CPU info                                                                      |
|  Model name:                         Intel(R) Xeon(R) Gold 6136 CPU @ 3.00GHz |
|  Cores:                                                                    12 |
|  Threads per core:                                                          2 |
|  Max clock rate (in MHz):                                                3001 |
|  Cache (L1d, L1i, L2, L3):                                32, 32, 1024, 25344 |
+-------------------------------------------------------------------------------+
| Started at:                                         Fri Jul 29 2022, 20:12:46 |
+-------------------------------------------------------------------------------+
+-------------------------------------------------------------------------------+
| Finished at:                                        Fri Jul 29 2022, 20:12:46 |
| Total number of linear solver iterations:                                 355 |
| Total number of time steps:                                                24 |
| Number of time steps in which the preconditioner was updated:               2 |
| Pre-iterate time: avg: 1.327222e-02 stddev: 3.870317e-03 min: 7.475905e-03 max: 1.959237e-02 |
|   nonlinear update time: avg: 2.094176e-03 stddev: 1.520214e-04 min: 1.926097e-03 max: 2.450871e-03 |
|   update_b time: avg: 2.100512e-03 stddev: 1.357080e-04 min: 1.829828e-03 max: 2.341923e-03 |
|   upwind update time: avg: 8.462911e-04 stddev: 4.918040e-05 min: 7.448410e-04 max: 9.340620e-04 |
|   upwind MPI synchronization time: avg: 7.158017e-03 stddev: 3.781009e-03 min: 1.206520e-03 max: 1.352675e-02 |
|   update_R time: avg: 4.121011e-04 stddev: 2.052028e-05 min: 3.794420e-04 max: 4.427780e-04 |
|   update_Q time: avg: 4.055774e-04 stddev: 2.112204e-05 min: 3.780560e-04 max: 4.429950e-04 |
|   model pre-iterate time: avg: 2.312175e-05 stddev: 5.434349e-07 min: 2.243800e-05 max: 2.406700e-05 |
| Linear system assembler time: avg: 2.517975e-03 stddev: 1.950120e-04 min: 2.151005e-03 max: 2.802752e-03 |
| Hypre matrix conversion time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 |
| Hypre setup time: avg: 9.167852e-03 stddev: 1.133747e-04 min: 8.982092e-03 max: 9.403418e-03 |
| Hypre solve time: avg: 1.377379e-01 stddev: 2.698516e-04 min: 1.372116e-01 max: 1.381316e-01 |
|   MPI synchronizations count:                                               0 |
|   MPI synchronization time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 |
|     async wait before start time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 |
|     async start time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 |
|     async wait time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 |
| Post-iterate time: avg: 5.507078e-04 stddev: 2.262086e-05 min: 5.195300e-04 max: 5.978690e-04 |
|   Z_iF -> Z_iK update time: avg: 7.716208e-05 stddev: 3.189499e-06 min: 7.254800e-05 max: 8.368700e-05 |
|   velocities update time: avg: 3.520552e-04 stddev: 1.938576e-05 min: 3.264330e-04 max: 3.923350e-04 |
|   model post-iterate time: avg: 2.263892e-05 stddev: 3.215008e-07 min: 2.214800e-05 max: 2.341300e-05 |
| MPI operations (included in the previous phases):                             |
|   MPI_Allreduce time: avg: 6.849431e-03 stddev: 4.053002e-03 min: 1.190520e-03 max: 1.576531e-02 |
| Compute time:                                                        0.162041 |
| I/O time:                                                           0.0654004 |
| Total time:                                                          0.268252 |
+-------------------------------------------------------------------------------+
Loading