Loading data/mcwhdd/comptimes_cpu_2D.tex +22 −10 Original line number Diff line number Diff line Loading @@ -8,17 +8,26 @@ % \usepackage{stackengine} % \usepackage[np]{numprint} \begin{tabular}{rN{5}{1} N{2}{1} N{1}{2} N{5}{1} N{2}{1} N{1}{2}} \begin{tabular}{rN{5}{1} N{2}{1} N{1}{2} N{5}{1} N{2}{1} N{1}{2} N{4}{1} N{2}{1} N{1}{2}} \toprule % header row 0 & \multicolumn{6}{c}{TNL} & \multicolumn{3}{c}{Hypre} \\ \cmidrule(lr){2-7} \cmidrule(l){8-10} % header row 1 & \multicolumn{3}{c}{OpenMP} & \multicolumn{3}{c}{MPI} & \multicolumn{3}{c}{MPI} \\ \cmidrule(lr){2-4} \cmidrule(l){5-7} \cmidrule(lr){5-7} \cmidrule(l){8-10} % header row 1 % header row 2 \multicolumn{1}{c}{Cores} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} Loading @@ -26,6 +35,9 @@ & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} \\ \midrule Loading @@ -34,37 +46,37 @@ $ \np{1} $ & 10743.9 & 1.0 & 1.00 & 10800.2 & 1.0 & 1.00 \\ 10743.9 & 1.0 & 1.00 & 10800.2 & 1.0 & 1.00 & 3510.1 & 1.0 & 1.00 \\ $ \np{2} $ & 6349.0 & 1.7 & 0.85 & 5693.5 & 1.9 & 0.95 \\ 6349.0 & 1.7 & 0.85 & 5693.5 & 1.9 & 0.95 & 2058.0 & 1.7 & 0.85 \\ $ \np{4} $ & 3375.9 & 3.2 & 0.80 & 3143.0 & 3.4 & 0.86 \\ 3375.9 & 3.2 & 0.80 & 3143.0 & 3.4 & 0.86 & 1097.1 & 3.2 & 0.80 \\ $ \np{6} $ & 2294.6 & 4.7 & 0.78 & 2506.0 & 4.3 & 0.72 \\ 2294.6 & 4.7 & 0.78 & 2506.0 & 4.3 & 0.72 & 750.5 & 4.7 & 0.78 \\ $ \np{8} $ & 1818.1 & 5.9 & 0.74 & 1787.6 & 6.0 & 0.76 \\ 1818.1 & 5.9 & 0.74 & 1787.6 & 6.0 & 0.76 & 587.6 & 6.0 & 0.75 \\ $ \np{12} $ & 1296.2 & 8.3 & 0.69 & 1096.8 & 9.8 & 0.82 \\ 1296.2 & 8.3 & 0.69 & 1096.8 & 9.8 & 0.82 & 424.8 & 8.3 & 0.69 \\ $ \np{24} $ & 977.0 & 11.0 & 0.46 & 549.3 & 19.7 & 0.82 \\ 977.0 & 11.0 & 0.46 & 549.3 & 19.7 & 0.82 & 215.5 & 16.3 & 0.68 \\ \bottomrule \end{tabular} data/mcwhdd/comptimes_cpu_3D.tex +39 −25 Original line number Diff line number Diff line Loading @@ -8,19 +8,30 @@ % \usepackage{stackengine} % \usepackage[np]{numprint} \begin{tabular}{rrrN{6}{1} N{2}{1} N{1}{2} N{6}{1} N{3}{1} N{1}{2}} \begin{tabular}{rrrN{6}{1} N{2}{1} N{1}{2} N{6}{1} N{3}{1} N{1}{2} N{5}{1} N{3}{1} N{1}{2}} \toprule % header row 0 & & & \multicolumn{6}{c}{TNL} & \multicolumn{3}{c}{Hypre} \\ \cmidrule(lr){4-9} \cmidrule(l){10-12} % header row 1 & & & \multicolumn{3}{c}{OpenMP} & \multicolumn{3}{c}{MPI} & \multicolumn{3}{c}{MPI} \\ \cmidrule(lr){4-6} \cmidrule(l){7-9} \cmidrule(lr){7-9} \cmidrule(l){10-12} % header row 1 % header row 2 \multicolumn{1}{c}{Cores} & \multicolumn{1}{c}{CPUs} & \multicolumn{1}{c}{Nodes} Loading @@ -30,76 +41,79 @@ & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} \\ \midrule $ \np{1} $ & & & 188243.0 & 1.0 & 1.00 & 188706.0 & 1.0 & 1.00 \\ 188243.0 & 1.0 & 1.00 & 188706.0 & 1.0 & 1.00 & 37991.2 & 1.0 & 1.00 \\ $ \np{2} $ & & & 102074.0 & 1.8 & 0.92 & 93659.1 & 2.0 & 1.01 \\ 102074.0 & 1.8 & 0.92 & 93659.1 & 2.0 & 1.01 & 21170.2 & 1.8 & 0.90 \\ $ \np{4} $ & & & 55937.6 & 3.4 & 0.84 & 49553.0 & 3.8 & 0.95 \\ 55937.6 & 3.4 & 0.84 & 49553.0 & 3.8 & 0.95 & 11252.2 & 3.4 & 0.84 \\ $ \np{6} $ & & & 40796.4 & 4.6 & 0.77 & 35594.3 & 5.3 & 0.88 \\ 40796.4 & 4.6 & 0.77 & 35594.3 & 5.3 & 0.88 & 7798.1 & 4.9 & 0.81 \\ $ \np{8} $ & & & 32026.3 & 5.9 & 0.73 & 28958.6 & 6.5 & 0.81 \\ 32026.3 & 5.9 & 0.73 & 28958.6 & 6.5 & 0.81 & 6085.4 & 6.2 & 0.78 \\ $ \np{12} $ & $ \np{1} $ & $ 1/2 $ & 26369.7 & 7.1 & 0.59 & 23839.0 & 7.9 & 0.66 \\ 26369.7 & 7.1 & 0.59 & 23839.0 & 7.9 & 0.66 & 4708.8 & 8.1 & 0.67 \\ $ \np{24} $ & $ \np{2} $ & $ \np{1} $ & 15695.0 & 12.0 & 0.50 & 12184.2 & 15.5 & 0.65 \\ 15695.0 & 12.0 & 0.50 & 12184.2 & 15.5 & 0.65 & 2485.0 & 15.3 & 0.64 \\ $ \np{48} $ & $ \np{4} $ & $ \np{2} $ & & & & 6171.4 & 30.6 & 0.64 \\ & & & 6171.4 & 30.6 & 0.64 & 1249.1 & 30.4 & 0.63 \\ $ \np{72} $ & $ \np{6} $ & $ \np{3} $ & & & & 4026.3 & 46.9 & 0.65 \\ & & & 4026.3 & 46.9 & 0.65 & 880.2 & 43.2 & 0.60 \\ $ \np{96} $ & $ \np{8} $ & $ \np{4} $ & & & & 3016.0 & 62.6 & 0.65 \\ & & & 3016.0 & 62.6 & 0.65 & 592.3 & 64.1 & 0.67 \\ $ \np{120} $ & $ \np{10} $ & $ \np{5} $ & & & & 2374.4 & 79.5 & 0.66 \\ & & & 2374.4 & 79.5 & 0.66 & 471.2 & 80.6 & 0.67 \\ $ \np{144} $ & $ \np{12} $ & $ \np{6} $ & & & & 1968.2 & 95.9 & 0.67 \\ & & & 1968.2 & 95.9 & 0.67 & 415.8 & 91.4 & 0.63 \\ $ \np{168} $ & $ \np{14} $ & $ \np{7} $ & & & & 1643.1 & 114.8 & 0.68 \\ & & & 1643.1 & 114.8 & 0.68 & 372.2 & 102.1 & 0.61 \\ $ \np{192} $ & $ \np{16} $ & $ \np{8} $ & & & & 1410.4 & 133.8 & 0.70 \\ & & & 1410.4 & 133.8 & 0.70 & 310.7 & 122.3 & 0.64 \\ $ \np{216} $ & $ \np{18} $ & $ \np{9} $ & & & & 1242.5 & 151.9 & 0.70 \\ & & & 1242.5 & 151.9 & 0.70 & 277.5 & 136.9 & 0.63 \\ $ \np{240} $ & $ \np{20} $ & $ \np{10} $ & & & & 1114.3 & 169.4 & 0.71 \\ & & & 1114.3 & 169.4 & 0.71 & 240.3 & 158.1 & 0.66 \\ $ \np{264} $ & $ \np{22} $ & $ \np{11} $ & & & & 1003.8 & 188.0 & 0.71 \\ & & & 1003.8 & 188.0 & 0.71 & 251.5 & 151.0 & 0.57 \\ $ \np{288} $ & $ \np{24} $ & $ \np{12} $ & & & & 924.2 & 204.2 & 0.71 \\ & & & 924.2 & 204.2 & 0.71 & 223.9 & 169.7 & 0.59 \\ $ \np{312} $ & $ \np{26} $ & $ \np{13} $ & & & & 860.5 & 219.3 & 0.70 \\ & & & 860.5 & 219.3 & 0.70 & 202.9 & 187.2 & 0.60 \\ $ \np{336} $ & $ \np{28} $ & $ \np{14} $ & & & & 807.3 & 233.8 & 0.70 \\ & & & 807.3 & 233.8 & 0.70 & 201.9 & 188.2 & 0.56 \\ $ \np{360} $ & $ \np{30} $ & $ \np{15} $ & & & & 761.6 & 247.8 & 0.69 \\ & & & 761.6 & 247.8 & 0.69 & & & \\ $ \np{384} $ & $ \np{32} $ & $ \np{16} $ & & & & 702.4 & 268.7 & 0.70 \\ & & & 702.4 & 268.7 & 0.70 & & & \\ \bottomrule \end{tabular} data/mcwhdd/comptimes_gpu.tex +24 −7 Original line number Diff line number Diff line Loading @@ -8,18 +8,35 @@ % \usepackage{stackengine} % \usepackage[np]{numprint} \begin{tabular}{rN{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2}} \begin{tabular}{rN{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2} N{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2}} \toprule % header row 0 & \multicolumn{6}{c}{TNL} & \multicolumn{6}{c}{Hypre} \\ \cmidrule(lr){2-7} \cmidrule(l){8-13} % header row 1 & \multicolumn{3}{c}{2D$^\triangle_5$} & \multicolumn{3}{c}{3D$^\triangle_5$} & \multicolumn{3}{c}{2D$^\triangle_5$} & \multicolumn{3}{c}{3D$^\triangle_5$} \\ \cmidrule(lr){2-4} \cmidrule(l){5-7} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(l){11-13} % header row 1 % header row 2 \multicolumn{1}{c}{GPUs} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} Loading @@ -34,22 +51,22 @@ $ \np{1} $ & 528.6 & 1.0 & 1.00 & 2654.8 & 1.0 & 1.00 \\ 528.6 & 1.0 & 1.00 & 2654.8 & 1.0 & 1.00 & 389.8 & 1.0 & 1.00 & 2014.5 & 1.0 & 1.00 \\ $ \np{2} $ & 566.1 & 0.9 & 0.47 & 1415.4 & 1.9 & 0.94 \\ 566.1 & 0.9 & 0.47 & 1415.4 & 1.9 & 0.94 & 500.6 & 0.8 & 0.39 & 1233.1 & 1.6 & 0.82 \\ $ \np{3} $ & 642.5 & 0.8 & 0.27 & 996.7 & 2.7 & 0.89 \\ 642.5 & 0.8 & 0.27 & 996.7 & 2.7 & 0.89 & 634.1 & 0.6 & 0.20 & 868.9 & 2.3 & 0.77 \\ $ \np{4} $ & 709.7 & 0.7 & 0.19 & 793.3 & 3.3 & 0.84 \\ 709.7 & 0.7 & 0.19 & 793.3 & 3.3 & 0.84 & 726.8 & 0.5 & 0.13 & 704.2 & 2.9 & 0.72 \\ \bottomrule \end{tabular} data/mcwhdd/hypre_cpu/cube1m_1-BC-noML_mpi_np1/host-bicgstab+diagonal/log 0 → 100644 +55 −0 Original line number Diff line number Diff line +-------------------------------------------------------------------------------+ | | | NumDwarf solver | | | +-------------------------------------------------------------------------------+ | MPI processes: 1 | | Device type: TNL::Devices::Host | | OMP enabled: no | | Real type: double | | Index type: int | | Mesh type:TNL::Meshes::Mesh<TNL::Meshes::DefaultConfig<TNL::Meshes::Topologies::Tetrahedron, 3, double, int, short>, TNL::Devices::Host> | | Sparse matrix:TNL::Matrices::SparseMatrix<double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight, double, std::allocator<double>, std::allocator<int> > | | Mass lumping: disabled | | Material model: BrooksCorey | | Formulation: PwPn | +-------------------------------------------------------------------------------+ | Host name: n16 | | System: Linux | | Release: 3.10.0-1127.13.1.el7.x86_64 | | Architecture: x86_64 | | TNL compiler: GNU G++ (10.2.0) | | CPU info | | Model name: Intel(R) Xeon(R) Gold 6136 CPU @ 3.00GHz | | Cores: 12 | | Threads per core: 2 | | Max clock rate (in MHz): 3001 | | Cache (L1d, L1i, L2, L3): 32, 32, 1024, 25344 | +-------------------------------------------------------------------------------+ | Started at: Fri Jul 29 2022, 20:13:07 | +-------------------------------------------------------------------------------+ +-------------------------------------------------------------------------------+ | Finished at: Fri Jul 29 2022, 20:13:07 | | Total number of linear solver iterations: 351 | | Total number of time steps: 24 | | Number of time steps in which the preconditioner was updated: 2 | | Pre-iterate time: avg: 4.930318e-02 stddev: 0.000000e+00 min: 4.930318e-02 max: 4.930318e-02 | | nonlinear update time: avg: 1.725292e-02 stddev: 0.000000e+00 min: 1.725292e-02 max: 1.725292e-02 | | update_b time: avg: 1.540224e-02 stddev: 0.000000e+00 min: 1.540224e-02 max: 1.540224e-02 | | upwind update time: avg: 9.500597e-03 stddev: 0.000000e+00 min: 9.500597e-03 max: 9.500597e-03 | | upwind MPI synchronization time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | update_R time: avg: 3.426872e-03 stddev: 0.000000e+00 min: 3.426872e-03 max: 3.426872e-03 | | update_Q time: avg: 3.423305e-03 stddev: 0.000000e+00 min: 3.423305e-03 max: 3.423305e-03 | | model pre-iterate time: avg: 2.461200e-05 stddev: 0.000000e+00 min: 2.461200e-05 max: 2.461200e-05 | | Linear system assembler time: avg: 2.796267e-02 stddev: 0.000000e+00 min: 2.796267e-02 max: 2.796267e-02 | | Hypre matrix conversion time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | Hypre setup time: avg: 1.124451e-02 stddev: 0.000000e+00 min: 1.124451e-02 max: 1.124451e-02 | | Hypre solve time: avg: 4.931529e-01 stddev: 0.000000e+00 min: 4.931529e-01 max: 4.931529e-01 | | Post-iterate time: avg: 3.499364e-03 stddev: 0.000000e+00 min: 3.499364e-03 max: 3.499364e-03 | | Z_iF -> Z_iK update time: avg: 4.951700e-04 stddev: 0.000000e+00 min: 4.951700e-04 max: 4.951700e-04 | | velocities update time: avg: 2.862900e-03 stddev: 0.000000e+00 min: 2.862900e-03 max: 2.862900e-03 | | model post-iterate time: avg: 2.453500e-05 stddev: 0.000000e+00 min: 2.453500e-05 max: 2.453500e-05 | | Compute time: 0.585421 | | I/O time: 0.0168328 | | Total time: 0.620314 | +-------------------------------------------------------------------------------+ data/mcwhdd/hypre_cpu/cube1m_1-BC-noML_mpi_np12/host-bicgstab+diagonal/log 0 → 100644 +62 −0 Original line number Diff line number Diff line +-------------------------------------------------------------------------------+ | | | NumDwarf solver | | | +-------------------------------------------------------------------------------+ | MPI processes: 12 | | Device type: TNL::Devices::Host | | OMP enabled: no | | Real type: double | | Index type: int | | Mesh type:TNL::Meshes::Mesh<TNL::Meshes::DefaultConfig<TNL::Meshes::Topologies::Tetrahedron, 3, double, int, short>, TNL::Devices::Host> | | Sparse matrix:TNL::Matrices::SparseMatrix<double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight, double, std::allocator<double>, std::allocator<int> > | | Mass lumping: disabled | | Material model: BrooksCorey | | Formulation: PwPn | +-------------------------------------------------------------------------------+ | Host name: n11 | | System: Linux | | Release: 3.10.0-1127.13.1.el7.x86_64 | | Architecture: x86_64 | | TNL compiler: GNU G++ (10.2.0) | | CPU info | | Model name: Intel(R) Xeon(R) Gold 6136 CPU @ 3.00GHz | | Cores: 12 | | Threads per core: 2 | | Max clock rate (in MHz): 3001 | | Cache (L1d, L1i, L2, L3): 32, 32, 1024, 25344 | +-------------------------------------------------------------------------------+ | Started at: Fri Jul 29 2022, 20:12:46 | +-------------------------------------------------------------------------------+ +-------------------------------------------------------------------------------+ | Finished at: Fri Jul 29 2022, 20:12:46 | | Total number of linear solver iterations: 355 | | Total number of time steps: 24 | | Number of time steps in which the preconditioner was updated: 2 | | Pre-iterate time: avg: 1.327222e-02 stddev: 3.870317e-03 min: 7.475905e-03 max: 1.959237e-02 | | nonlinear update time: avg: 2.094176e-03 stddev: 1.520214e-04 min: 1.926097e-03 max: 2.450871e-03 | | update_b time: avg: 2.100512e-03 stddev: 1.357080e-04 min: 1.829828e-03 max: 2.341923e-03 | | upwind update time: avg: 8.462911e-04 stddev: 4.918040e-05 min: 7.448410e-04 max: 9.340620e-04 | | upwind MPI synchronization time: avg: 7.158017e-03 stddev: 3.781009e-03 min: 1.206520e-03 max: 1.352675e-02 | | update_R time: avg: 4.121011e-04 stddev: 2.052028e-05 min: 3.794420e-04 max: 4.427780e-04 | | update_Q time: avg: 4.055774e-04 stddev: 2.112204e-05 min: 3.780560e-04 max: 4.429950e-04 | | model pre-iterate time: avg: 2.312175e-05 stddev: 5.434349e-07 min: 2.243800e-05 max: 2.406700e-05 | | Linear system assembler time: avg: 2.517975e-03 stddev: 1.950120e-04 min: 2.151005e-03 max: 2.802752e-03 | | Hypre matrix conversion time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | Hypre setup time: avg: 9.167852e-03 stddev: 1.133747e-04 min: 8.982092e-03 max: 9.403418e-03 | | Hypre solve time: avg: 1.377379e-01 stddev: 2.698516e-04 min: 1.372116e-01 max: 1.381316e-01 | | MPI synchronizations count: 0 | | MPI synchronization time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | async wait before start time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | async start time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | async wait time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | Post-iterate time: avg: 5.507078e-04 stddev: 2.262086e-05 min: 5.195300e-04 max: 5.978690e-04 | | Z_iF -> Z_iK update time: avg: 7.716208e-05 stddev: 3.189499e-06 min: 7.254800e-05 max: 8.368700e-05 | | velocities update time: avg: 3.520552e-04 stddev: 1.938576e-05 min: 3.264330e-04 max: 3.923350e-04 | | model post-iterate time: avg: 2.263892e-05 stddev: 3.215008e-07 min: 2.214800e-05 max: 2.341300e-05 | | MPI operations (included in the previous phases): | | MPI_Allreduce time: avg: 6.849431e-03 stddev: 4.053002e-03 min: 1.190520e-03 max: 1.576531e-02 | | Compute time: 0.162041 | | I/O time: 0.0654004 | | Total time: 0.268252 | +-------------------------------------------------------------------------------+ Loading
data/mcwhdd/comptimes_cpu_2D.tex +22 −10 Original line number Diff line number Diff line Loading @@ -8,17 +8,26 @@ % \usepackage{stackengine} % \usepackage[np]{numprint} \begin{tabular}{rN{5}{1} N{2}{1} N{1}{2} N{5}{1} N{2}{1} N{1}{2}} \begin{tabular}{rN{5}{1} N{2}{1} N{1}{2} N{5}{1} N{2}{1} N{1}{2} N{4}{1} N{2}{1} N{1}{2}} \toprule % header row 0 & \multicolumn{6}{c}{TNL} & \multicolumn{3}{c}{Hypre} \\ \cmidrule(lr){2-7} \cmidrule(l){8-10} % header row 1 & \multicolumn{3}{c}{OpenMP} & \multicolumn{3}{c}{MPI} & \multicolumn{3}{c}{MPI} \\ \cmidrule(lr){2-4} \cmidrule(l){5-7} \cmidrule(lr){5-7} \cmidrule(l){8-10} % header row 1 % header row 2 \multicolumn{1}{c}{Cores} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} Loading @@ -26,6 +35,9 @@ & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} \\ \midrule Loading @@ -34,37 +46,37 @@ $ \np{1} $ & 10743.9 & 1.0 & 1.00 & 10800.2 & 1.0 & 1.00 \\ 10743.9 & 1.0 & 1.00 & 10800.2 & 1.0 & 1.00 & 3510.1 & 1.0 & 1.00 \\ $ \np{2} $ & 6349.0 & 1.7 & 0.85 & 5693.5 & 1.9 & 0.95 \\ 6349.0 & 1.7 & 0.85 & 5693.5 & 1.9 & 0.95 & 2058.0 & 1.7 & 0.85 \\ $ \np{4} $ & 3375.9 & 3.2 & 0.80 & 3143.0 & 3.4 & 0.86 \\ 3375.9 & 3.2 & 0.80 & 3143.0 & 3.4 & 0.86 & 1097.1 & 3.2 & 0.80 \\ $ \np{6} $ & 2294.6 & 4.7 & 0.78 & 2506.0 & 4.3 & 0.72 \\ 2294.6 & 4.7 & 0.78 & 2506.0 & 4.3 & 0.72 & 750.5 & 4.7 & 0.78 \\ $ \np{8} $ & 1818.1 & 5.9 & 0.74 & 1787.6 & 6.0 & 0.76 \\ 1818.1 & 5.9 & 0.74 & 1787.6 & 6.0 & 0.76 & 587.6 & 6.0 & 0.75 \\ $ \np{12} $ & 1296.2 & 8.3 & 0.69 & 1096.8 & 9.8 & 0.82 \\ 1296.2 & 8.3 & 0.69 & 1096.8 & 9.8 & 0.82 & 424.8 & 8.3 & 0.69 \\ $ \np{24} $ & 977.0 & 11.0 & 0.46 & 549.3 & 19.7 & 0.82 \\ 977.0 & 11.0 & 0.46 & 549.3 & 19.7 & 0.82 & 215.5 & 16.3 & 0.68 \\ \bottomrule \end{tabular}
data/mcwhdd/comptimes_cpu_3D.tex +39 −25 Original line number Diff line number Diff line Loading @@ -8,19 +8,30 @@ % \usepackage{stackengine} % \usepackage[np]{numprint} \begin{tabular}{rrrN{6}{1} N{2}{1} N{1}{2} N{6}{1} N{3}{1} N{1}{2}} \begin{tabular}{rrrN{6}{1} N{2}{1} N{1}{2} N{6}{1} N{3}{1} N{1}{2} N{5}{1} N{3}{1} N{1}{2}} \toprule % header row 0 & & & \multicolumn{6}{c}{TNL} & \multicolumn{3}{c}{Hypre} \\ \cmidrule(lr){4-9} \cmidrule(l){10-12} % header row 1 & & & \multicolumn{3}{c}{OpenMP} & \multicolumn{3}{c}{MPI} & \multicolumn{3}{c}{MPI} \\ \cmidrule(lr){4-6} \cmidrule(l){7-9} \cmidrule(lr){7-9} \cmidrule(l){10-12} % header row 1 % header row 2 \multicolumn{1}{c}{Cores} & \multicolumn{1}{c}{CPUs} & \multicolumn{1}{c}{Nodes} Loading @@ -30,76 +41,79 @@ & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} \\ \midrule $ \np{1} $ & & & 188243.0 & 1.0 & 1.00 & 188706.0 & 1.0 & 1.00 \\ 188243.0 & 1.0 & 1.00 & 188706.0 & 1.0 & 1.00 & 37991.2 & 1.0 & 1.00 \\ $ \np{2} $ & & & 102074.0 & 1.8 & 0.92 & 93659.1 & 2.0 & 1.01 \\ 102074.0 & 1.8 & 0.92 & 93659.1 & 2.0 & 1.01 & 21170.2 & 1.8 & 0.90 \\ $ \np{4} $ & & & 55937.6 & 3.4 & 0.84 & 49553.0 & 3.8 & 0.95 \\ 55937.6 & 3.4 & 0.84 & 49553.0 & 3.8 & 0.95 & 11252.2 & 3.4 & 0.84 \\ $ \np{6} $ & & & 40796.4 & 4.6 & 0.77 & 35594.3 & 5.3 & 0.88 \\ 40796.4 & 4.6 & 0.77 & 35594.3 & 5.3 & 0.88 & 7798.1 & 4.9 & 0.81 \\ $ \np{8} $ & & & 32026.3 & 5.9 & 0.73 & 28958.6 & 6.5 & 0.81 \\ 32026.3 & 5.9 & 0.73 & 28958.6 & 6.5 & 0.81 & 6085.4 & 6.2 & 0.78 \\ $ \np{12} $ & $ \np{1} $ & $ 1/2 $ & 26369.7 & 7.1 & 0.59 & 23839.0 & 7.9 & 0.66 \\ 26369.7 & 7.1 & 0.59 & 23839.0 & 7.9 & 0.66 & 4708.8 & 8.1 & 0.67 \\ $ \np{24} $ & $ \np{2} $ & $ \np{1} $ & 15695.0 & 12.0 & 0.50 & 12184.2 & 15.5 & 0.65 \\ 15695.0 & 12.0 & 0.50 & 12184.2 & 15.5 & 0.65 & 2485.0 & 15.3 & 0.64 \\ $ \np{48} $ & $ \np{4} $ & $ \np{2} $ & & & & 6171.4 & 30.6 & 0.64 \\ & & & 6171.4 & 30.6 & 0.64 & 1249.1 & 30.4 & 0.63 \\ $ \np{72} $ & $ \np{6} $ & $ \np{3} $ & & & & 4026.3 & 46.9 & 0.65 \\ & & & 4026.3 & 46.9 & 0.65 & 880.2 & 43.2 & 0.60 \\ $ \np{96} $ & $ \np{8} $ & $ \np{4} $ & & & & 3016.0 & 62.6 & 0.65 \\ & & & 3016.0 & 62.6 & 0.65 & 592.3 & 64.1 & 0.67 \\ $ \np{120} $ & $ \np{10} $ & $ \np{5} $ & & & & 2374.4 & 79.5 & 0.66 \\ & & & 2374.4 & 79.5 & 0.66 & 471.2 & 80.6 & 0.67 \\ $ \np{144} $ & $ \np{12} $ & $ \np{6} $ & & & & 1968.2 & 95.9 & 0.67 \\ & & & 1968.2 & 95.9 & 0.67 & 415.8 & 91.4 & 0.63 \\ $ \np{168} $ & $ \np{14} $ & $ \np{7} $ & & & & 1643.1 & 114.8 & 0.68 \\ & & & 1643.1 & 114.8 & 0.68 & 372.2 & 102.1 & 0.61 \\ $ \np{192} $ & $ \np{16} $ & $ \np{8} $ & & & & 1410.4 & 133.8 & 0.70 \\ & & & 1410.4 & 133.8 & 0.70 & 310.7 & 122.3 & 0.64 \\ $ \np{216} $ & $ \np{18} $ & $ \np{9} $ & & & & 1242.5 & 151.9 & 0.70 \\ & & & 1242.5 & 151.9 & 0.70 & 277.5 & 136.9 & 0.63 \\ $ \np{240} $ & $ \np{20} $ & $ \np{10} $ & & & & 1114.3 & 169.4 & 0.71 \\ & & & 1114.3 & 169.4 & 0.71 & 240.3 & 158.1 & 0.66 \\ $ \np{264} $ & $ \np{22} $ & $ \np{11} $ & & & & 1003.8 & 188.0 & 0.71 \\ & & & 1003.8 & 188.0 & 0.71 & 251.5 & 151.0 & 0.57 \\ $ \np{288} $ & $ \np{24} $ & $ \np{12} $ & & & & 924.2 & 204.2 & 0.71 \\ & & & 924.2 & 204.2 & 0.71 & 223.9 & 169.7 & 0.59 \\ $ \np{312} $ & $ \np{26} $ & $ \np{13} $ & & & & 860.5 & 219.3 & 0.70 \\ & & & 860.5 & 219.3 & 0.70 & 202.9 & 187.2 & 0.60 \\ $ \np{336} $ & $ \np{28} $ & $ \np{14} $ & & & & 807.3 & 233.8 & 0.70 \\ & & & 807.3 & 233.8 & 0.70 & 201.9 & 188.2 & 0.56 \\ $ \np{360} $ & $ \np{30} $ & $ \np{15} $ & & & & 761.6 & 247.8 & 0.69 \\ & & & 761.6 & 247.8 & 0.69 & & & \\ $ \np{384} $ & $ \np{32} $ & $ \np{16} $ & & & & 702.4 & 268.7 & 0.70 \\ & & & 702.4 & 268.7 & 0.70 & & & \\ \bottomrule \end{tabular}
data/mcwhdd/comptimes_gpu.tex +24 −7 Original line number Diff line number Diff line Loading @@ -8,18 +8,35 @@ % \usepackage{stackengine} % \usepackage[np]{numprint} \begin{tabular}{rN{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2}} \begin{tabular}{rN{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2} N{3}{1} N{1}{1} N{1}{2} N{4}{1} N{1}{1} N{1}{2}} \toprule % header row 0 & \multicolumn{6}{c}{TNL} & \multicolumn{6}{c}{Hypre} \\ \cmidrule(lr){2-7} \cmidrule(l){8-13} % header row 1 & \multicolumn{3}{c}{2D$^\triangle_5$} & \multicolumn{3}{c}{3D$^\triangle_5$} & \multicolumn{3}{c}{2D$^\triangle_5$} & \multicolumn{3}{c}{3D$^\triangle_5$} \\ \cmidrule(lr){2-4} \cmidrule(l){5-7} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(l){11-13} % header row 1 % header row 2 \multicolumn{1}{c}{GPUs} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} & \multicolumn{1}{c}{$ CT $} & \multicolumn{1}{c}{$ Sp $} & \multicolumn{1}{c}{$ E\!f\!f $} Loading @@ -34,22 +51,22 @@ $ \np{1} $ & 528.6 & 1.0 & 1.00 & 2654.8 & 1.0 & 1.00 \\ 528.6 & 1.0 & 1.00 & 2654.8 & 1.0 & 1.00 & 389.8 & 1.0 & 1.00 & 2014.5 & 1.0 & 1.00 \\ $ \np{2} $ & 566.1 & 0.9 & 0.47 & 1415.4 & 1.9 & 0.94 \\ 566.1 & 0.9 & 0.47 & 1415.4 & 1.9 & 0.94 & 500.6 & 0.8 & 0.39 & 1233.1 & 1.6 & 0.82 \\ $ \np{3} $ & 642.5 & 0.8 & 0.27 & 996.7 & 2.7 & 0.89 \\ 642.5 & 0.8 & 0.27 & 996.7 & 2.7 & 0.89 & 634.1 & 0.6 & 0.20 & 868.9 & 2.3 & 0.77 \\ $ \np{4} $ & 709.7 & 0.7 & 0.19 & 793.3 & 3.3 & 0.84 \\ 709.7 & 0.7 & 0.19 & 793.3 & 3.3 & 0.84 & 726.8 & 0.5 & 0.13 & 704.2 & 2.9 & 0.72 \\ \bottomrule \end{tabular}
data/mcwhdd/hypre_cpu/cube1m_1-BC-noML_mpi_np1/host-bicgstab+diagonal/log 0 → 100644 +55 −0 Original line number Diff line number Diff line +-------------------------------------------------------------------------------+ | | | NumDwarf solver | | | +-------------------------------------------------------------------------------+ | MPI processes: 1 | | Device type: TNL::Devices::Host | | OMP enabled: no | | Real type: double | | Index type: int | | Mesh type:TNL::Meshes::Mesh<TNL::Meshes::DefaultConfig<TNL::Meshes::Topologies::Tetrahedron, 3, double, int, short>, TNL::Devices::Host> | | Sparse matrix:TNL::Matrices::SparseMatrix<double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight, double, std::allocator<double>, std::allocator<int> > | | Mass lumping: disabled | | Material model: BrooksCorey | | Formulation: PwPn | +-------------------------------------------------------------------------------+ | Host name: n16 | | System: Linux | | Release: 3.10.0-1127.13.1.el7.x86_64 | | Architecture: x86_64 | | TNL compiler: GNU G++ (10.2.0) | | CPU info | | Model name: Intel(R) Xeon(R) Gold 6136 CPU @ 3.00GHz | | Cores: 12 | | Threads per core: 2 | | Max clock rate (in MHz): 3001 | | Cache (L1d, L1i, L2, L3): 32, 32, 1024, 25344 | +-------------------------------------------------------------------------------+ | Started at: Fri Jul 29 2022, 20:13:07 | +-------------------------------------------------------------------------------+ +-------------------------------------------------------------------------------+ | Finished at: Fri Jul 29 2022, 20:13:07 | | Total number of linear solver iterations: 351 | | Total number of time steps: 24 | | Number of time steps in which the preconditioner was updated: 2 | | Pre-iterate time: avg: 4.930318e-02 stddev: 0.000000e+00 min: 4.930318e-02 max: 4.930318e-02 | | nonlinear update time: avg: 1.725292e-02 stddev: 0.000000e+00 min: 1.725292e-02 max: 1.725292e-02 | | update_b time: avg: 1.540224e-02 stddev: 0.000000e+00 min: 1.540224e-02 max: 1.540224e-02 | | upwind update time: avg: 9.500597e-03 stddev: 0.000000e+00 min: 9.500597e-03 max: 9.500597e-03 | | upwind MPI synchronization time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | update_R time: avg: 3.426872e-03 stddev: 0.000000e+00 min: 3.426872e-03 max: 3.426872e-03 | | update_Q time: avg: 3.423305e-03 stddev: 0.000000e+00 min: 3.423305e-03 max: 3.423305e-03 | | model pre-iterate time: avg: 2.461200e-05 stddev: 0.000000e+00 min: 2.461200e-05 max: 2.461200e-05 | | Linear system assembler time: avg: 2.796267e-02 stddev: 0.000000e+00 min: 2.796267e-02 max: 2.796267e-02 | | Hypre matrix conversion time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | Hypre setup time: avg: 1.124451e-02 stddev: 0.000000e+00 min: 1.124451e-02 max: 1.124451e-02 | | Hypre solve time: avg: 4.931529e-01 stddev: 0.000000e+00 min: 4.931529e-01 max: 4.931529e-01 | | Post-iterate time: avg: 3.499364e-03 stddev: 0.000000e+00 min: 3.499364e-03 max: 3.499364e-03 | | Z_iF -> Z_iK update time: avg: 4.951700e-04 stddev: 0.000000e+00 min: 4.951700e-04 max: 4.951700e-04 | | velocities update time: avg: 2.862900e-03 stddev: 0.000000e+00 min: 2.862900e-03 max: 2.862900e-03 | | model post-iterate time: avg: 2.453500e-05 stddev: 0.000000e+00 min: 2.453500e-05 max: 2.453500e-05 | | Compute time: 0.585421 | | I/O time: 0.0168328 | | Total time: 0.620314 | +-------------------------------------------------------------------------------+
data/mcwhdd/hypre_cpu/cube1m_1-BC-noML_mpi_np12/host-bicgstab+diagonal/log 0 → 100644 +62 −0 Original line number Diff line number Diff line +-------------------------------------------------------------------------------+ | | | NumDwarf solver | | | +-------------------------------------------------------------------------------+ | MPI processes: 12 | | Device type: TNL::Devices::Host | | OMP enabled: no | | Real type: double | | Index type: int | | Mesh type:TNL::Meshes::Mesh<TNL::Meshes::DefaultConfig<TNL::Meshes::Topologies::Tetrahedron, 3, double, int, short>, TNL::Devices::Host> | | Sparse matrix:TNL::Matrices::SparseMatrix<double, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRLight, double, std::allocator<double>, std::allocator<int> > | | Mass lumping: disabled | | Material model: BrooksCorey | | Formulation: PwPn | +-------------------------------------------------------------------------------+ | Host name: n11 | | System: Linux | | Release: 3.10.0-1127.13.1.el7.x86_64 | | Architecture: x86_64 | | TNL compiler: GNU G++ (10.2.0) | | CPU info | | Model name: Intel(R) Xeon(R) Gold 6136 CPU @ 3.00GHz | | Cores: 12 | | Threads per core: 2 | | Max clock rate (in MHz): 3001 | | Cache (L1d, L1i, L2, L3): 32, 32, 1024, 25344 | +-------------------------------------------------------------------------------+ | Started at: Fri Jul 29 2022, 20:12:46 | +-------------------------------------------------------------------------------+ +-------------------------------------------------------------------------------+ | Finished at: Fri Jul 29 2022, 20:12:46 | | Total number of linear solver iterations: 355 | | Total number of time steps: 24 | | Number of time steps in which the preconditioner was updated: 2 | | Pre-iterate time: avg: 1.327222e-02 stddev: 3.870317e-03 min: 7.475905e-03 max: 1.959237e-02 | | nonlinear update time: avg: 2.094176e-03 stddev: 1.520214e-04 min: 1.926097e-03 max: 2.450871e-03 | | update_b time: avg: 2.100512e-03 stddev: 1.357080e-04 min: 1.829828e-03 max: 2.341923e-03 | | upwind update time: avg: 8.462911e-04 stddev: 4.918040e-05 min: 7.448410e-04 max: 9.340620e-04 | | upwind MPI synchronization time: avg: 7.158017e-03 stddev: 3.781009e-03 min: 1.206520e-03 max: 1.352675e-02 | | update_R time: avg: 4.121011e-04 stddev: 2.052028e-05 min: 3.794420e-04 max: 4.427780e-04 | | update_Q time: avg: 4.055774e-04 stddev: 2.112204e-05 min: 3.780560e-04 max: 4.429950e-04 | | model pre-iterate time: avg: 2.312175e-05 stddev: 5.434349e-07 min: 2.243800e-05 max: 2.406700e-05 | | Linear system assembler time: avg: 2.517975e-03 stddev: 1.950120e-04 min: 2.151005e-03 max: 2.802752e-03 | | Hypre matrix conversion time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | Hypre setup time: avg: 9.167852e-03 stddev: 1.133747e-04 min: 8.982092e-03 max: 9.403418e-03 | | Hypre solve time: avg: 1.377379e-01 stddev: 2.698516e-04 min: 1.372116e-01 max: 1.381316e-01 | | MPI synchronizations count: 0 | | MPI synchronization time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | async wait before start time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | async start time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | async wait time: avg: 0.000000e+00 stddev: 0.000000e+00 min: 0.000000e+00 max: 0.000000e+00 | | Post-iterate time: avg: 5.507078e-04 stddev: 2.262086e-05 min: 5.195300e-04 max: 5.978690e-04 | | Z_iF -> Z_iK update time: avg: 7.716208e-05 stddev: 3.189499e-06 min: 7.254800e-05 max: 8.368700e-05 | | velocities update time: avg: 3.520552e-04 stddev: 1.938576e-05 min: 3.264330e-04 max: 3.923350e-04 | | model post-iterate time: avg: 2.263892e-05 stddev: 3.215008e-07 min: 2.214800e-05 max: 2.341300e-05 | | MPI operations (included in the previous phases): | | MPI_Allreduce time: avg: 6.849431e-03 stddev: 4.053002e-03 min: 1.190520e-03 max: 1.576531e-02 | | Compute time: 0.162041 | | I/O time: 0.0654004 | | Total time: 0.268252 | +-------------------------------------------------------------------------------+