Loading Benchmark/docs_seq_refine.py 0 → 100755 +85 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 import os import shutil import sys import pandas as pd import matplotlib.pyplot as plt if len(sys.argv) != 3: print("Usage: ./script <input_csv> <output_dir>") exit(1) input_csv = sys.argv[1] output_dir = sys.argv[2] if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) # Load the data # OPERATION;REFINES;SIZE;DISTRIBUTION;LINEAR;CLASSIC data = pd.read_csv(input_csv, delimiter=';') # Drop the REFINES column data = data.drop(columns=['REFINES']) # Filter the data for 'refinement' operation refinement_data = data[data['OPERATION'] == 'refinement'] # Get the unique distributions distributions = refinement_data['DISTRIBUTION'].unique() # Loop through each distribution and create a plot for dist in distributions: # Check if the directory exists, remove it if it does result_path = f"{output_dir}/{dist}" os.makedirs(result_path) # Filter data for the current distribution dist_data = refinement_data[refinement_data['DISTRIBUTION'] == dist] dist_data = dist_data.drop(columns=['DISTRIBUTION', 'OPERATION']) # Calculate the speedup dist_data['SPEEDUP'] = dist_data['LINEAR'] / dist_data['CLASSIC'] # Calc avg, min, max for each size dist_data = dist_data.groupby('SIZE').agg( { 'LINEAR': ['mean', 'min', 'max'], 'CLASSIC': ['mean', 'min', 'max'], 'SPEEDUP': ['mean', 'min', 'max'] } ) # Rename the columns dist_data.columns = [ 'LINEAR_AVG', 'LINEAR_MIN', 'LINEAR_MAX', 'CLASSIC_AVG', 'CLASSIC_MIN', 'CLASSIC_MAX', 'SPEEDUP_AVG', 'SPEEDUP_MIN', 'SPEEDUP_MAX' ] # Plotting times plt.plot(dist_data['LINEAR_AVG'], label='Linear', marker='o') plt.plot(dist_data['CLASSIC_AVG'], label='Classic', marker='x') plt.title(f"{dist} distribution") plt.xlabel('Size') plt.ylabel('Time (s)') plt.legend() plt.grid(True) plt.savefig(f"{result_path}/times.pdf") plt.close() # Plotting speedup, one is speedup, other is 1 for reference plt.plot(dist_data['SPEEDUP_AVG'], label='Speedup', marker='o') plt.title(f"{dist} distribution, speedup") plt.xlabel('Size') plt.ylabel('Speedup') plt.legend() plt.grid(True) plt.savefig(f"{result_path}/speedup.pdf") plt.close() # Save the data to csv dist_data.to_csv(f"{result_path}/data.csv", sep=';') Benchmark/docs_seq_search.py +18 −28 Original line number Diff line number Diff line Loading @@ -13,26 +13,25 @@ def group_by_size(df: pd.DataFrame) -> pd.DataFrame: # LINEAR_MEAN, LINEAR_MIN, LINEAR_MAX # CLASSIC_MEAN, CLASSIC_MIN, CLASSIC_MAX # SPEEDUP_MEAN, SPEEDUP_MIN, SPEEDUP_MAX # But keep SIZE column! df = df.groupby("SIZE").agg( { "LINEAR": ["mean", "min", "max"], "CLASSIC": ["mean", "min", "max"], "SPEEDUP": ["mean", "min", "max"], "SPEEDUP": ["mean", "min", "max"] } ) # Rename columns df.columns = [ "LINEAR_AVG", "LINEAR_MIN", "LINEAR_MAX", "CLASSIC_AVG", "CLASSIC_MIN", "CLASSIC_MAX", "SPEEDUP_AVG", "SPEEDUP_MIN", "SPEEDUP_MAX", "LINEAR_AVG", "LINEAR_MIN", "LINEAR_MAX", "CLASSIC_AVG", "CLASSIC_MIN", "CLASSIC_MAX", "SPEEDUP_AVG", "SPEEDUP_MIN", "SPEEDUP_MAX" ] df = df.reset_index() df = df.reset_index() # Reset index, so SIZE is a column again # print(df) return df Loading @@ -43,11 +42,8 @@ if len(sys.argv) != 3: input_csv = sys.argv[1] output_dir = sys.argv[2] TITLE_FORMAT = "{operation}, {refine} refines, {distribution} distribution" DIRNAME_FORMAT = "{distribution}_{refine}" # If create dir doesn't exist, create it if not os.path.exists(output_dir): if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) # CSV columns: Loading @@ -73,16 +69,12 @@ operations = [ for distribution in distributions: for refine in refines: # If directory exists, recreate it, even if it's empty dirname = DIRNAME_FORMAT.format( refine=refine, distribution=distribution ) dirname = f"{distribution}_{refine}" result_dir = output_dir + "/" + dirname if os.path.exists(result_dir): shutil.rmtree(result_dir) os.makedirs(result_dir) os.makedirs(result_dir + "/times") os.makedirs(result_dir + "/speedup") os.makedirs(result_dir + "/txt") os.makedirs(result_dir + "/csv") for operation in operations: # Filter by distribution, refine, operation Loading @@ -90,9 +82,7 @@ for distribution in distributions: filtered = filtered.loc[filtered["REFINES"] == refine] filtered = filtered.loc[filtered["OPERATION"] == operation] title = TITLE_FORMAT.format( operation=operation, refine=refine, distribution=distribution ) title = f"{operation}, {refine} refines, {distribution} distribution" # Create dataframes all_df = filtered[["SIZE", "LINEAR", "CLASSIC", "SPEEDUP"]] Loading Loading @@ -122,9 +112,9 @@ for distribution in distributions: plt.ylabel("Speedup, times") plt.title(title) plt.legend(["Speedup, linear/classic"]) plt.grid(True) plt.savefig(f"{result_dir}/speedup/{operation}.pdf") plt.close() # Save dataframe as table to file with open(f"{result_dir}/txt/{operation}.txt", "w") as f: f.write(all_df.to_string(index=False)) all_df.to_csv(f"{result_dir}/csv/{operation}.csv", index=False) Loading
Benchmark/docs_seq_refine.py 0 → 100755 +85 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 import os import shutil import sys import pandas as pd import matplotlib.pyplot as plt if len(sys.argv) != 3: print("Usage: ./script <input_csv> <output_dir>") exit(1) input_csv = sys.argv[1] output_dir = sys.argv[2] if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) # Load the data # OPERATION;REFINES;SIZE;DISTRIBUTION;LINEAR;CLASSIC data = pd.read_csv(input_csv, delimiter=';') # Drop the REFINES column data = data.drop(columns=['REFINES']) # Filter the data for 'refinement' operation refinement_data = data[data['OPERATION'] == 'refinement'] # Get the unique distributions distributions = refinement_data['DISTRIBUTION'].unique() # Loop through each distribution and create a plot for dist in distributions: # Check if the directory exists, remove it if it does result_path = f"{output_dir}/{dist}" os.makedirs(result_path) # Filter data for the current distribution dist_data = refinement_data[refinement_data['DISTRIBUTION'] == dist] dist_data = dist_data.drop(columns=['DISTRIBUTION', 'OPERATION']) # Calculate the speedup dist_data['SPEEDUP'] = dist_data['LINEAR'] / dist_data['CLASSIC'] # Calc avg, min, max for each size dist_data = dist_data.groupby('SIZE').agg( { 'LINEAR': ['mean', 'min', 'max'], 'CLASSIC': ['mean', 'min', 'max'], 'SPEEDUP': ['mean', 'min', 'max'] } ) # Rename the columns dist_data.columns = [ 'LINEAR_AVG', 'LINEAR_MIN', 'LINEAR_MAX', 'CLASSIC_AVG', 'CLASSIC_MIN', 'CLASSIC_MAX', 'SPEEDUP_AVG', 'SPEEDUP_MIN', 'SPEEDUP_MAX' ] # Plotting times plt.plot(dist_data['LINEAR_AVG'], label='Linear', marker='o') plt.plot(dist_data['CLASSIC_AVG'], label='Classic', marker='x') plt.title(f"{dist} distribution") plt.xlabel('Size') plt.ylabel('Time (s)') plt.legend() plt.grid(True) plt.savefig(f"{result_path}/times.pdf") plt.close() # Plotting speedup, one is speedup, other is 1 for reference plt.plot(dist_data['SPEEDUP_AVG'], label='Speedup', marker='o') plt.title(f"{dist} distribution, speedup") plt.xlabel('Size') plt.ylabel('Speedup') plt.legend() plt.grid(True) plt.savefig(f"{result_path}/speedup.pdf") plt.close() # Save the data to csv dist_data.to_csv(f"{result_path}/data.csv", sep=';')
Benchmark/docs_seq_search.py +18 −28 Original line number Diff line number Diff line Loading @@ -13,26 +13,25 @@ def group_by_size(df: pd.DataFrame) -> pd.DataFrame: # LINEAR_MEAN, LINEAR_MIN, LINEAR_MAX # CLASSIC_MEAN, CLASSIC_MIN, CLASSIC_MAX # SPEEDUP_MEAN, SPEEDUP_MIN, SPEEDUP_MAX # But keep SIZE column! df = df.groupby("SIZE").agg( { "LINEAR": ["mean", "min", "max"], "CLASSIC": ["mean", "min", "max"], "SPEEDUP": ["mean", "min", "max"], "SPEEDUP": ["mean", "min", "max"] } ) # Rename columns df.columns = [ "LINEAR_AVG", "LINEAR_MIN", "LINEAR_MAX", "CLASSIC_AVG", "CLASSIC_MIN", "CLASSIC_MAX", "SPEEDUP_AVG", "SPEEDUP_MIN", "SPEEDUP_MAX", "LINEAR_AVG", "LINEAR_MIN", "LINEAR_MAX", "CLASSIC_AVG", "CLASSIC_MIN", "CLASSIC_MAX", "SPEEDUP_AVG", "SPEEDUP_MIN", "SPEEDUP_MAX" ] df = df.reset_index() df = df.reset_index() # Reset index, so SIZE is a column again # print(df) return df Loading @@ -43,11 +42,8 @@ if len(sys.argv) != 3: input_csv = sys.argv[1] output_dir = sys.argv[2] TITLE_FORMAT = "{operation}, {refine} refines, {distribution} distribution" DIRNAME_FORMAT = "{distribution}_{refine}" # If create dir doesn't exist, create it if not os.path.exists(output_dir): if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) # CSV columns: Loading @@ -73,16 +69,12 @@ operations = [ for distribution in distributions: for refine in refines: # If directory exists, recreate it, even if it's empty dirname = DIRNAME_FORMAT.format( refine=refine, distribution=distribution ) dirname = f"{distribution}_{refine}" result_dir = output_dir + "/" + dirname if os.path.exists(result_dir): shutil.rmtree(result_dir) os.makedirs(result_dir) os.makedirs(result_dir + "/times") os.makedirs(result_dir + "/speedup") os.makedirs(result_dir + "/txt") os.makedirs(result_dir + "/csv") for operation in operations: # Filter by distribution, refine, operation Loading @@ -90,9 +82,7 @@ for distribution in distributions: filtered = filtered.loc[filtered["REFINES"] == refine] filtered = filtered.loc[filtered["OPERATION"] == operation] title = TITLE_FORMAT.format( operation=operation, refine=refine, distribution=distribution ) title = f"{operation}, {refine} refines, {distribution} distribution" # Create dataframes all_df = filtered[["SIZE", "LINEAR", "CLASSIC", "SPEEDUP"]] Loading Loading @@ -122,9 +112,9 @@ for distribution in distributions: plt.ylabel("Speedup, times") plt.title(title) plt.legend(["Speedup, linear/classic"]) plt.grid(True) plt.savefig(f"{result_dir}/speedup/{operation}.pdf") plt.close() # Save dataframe as table to file with open(f"{result_dir}/txt/{operation}.txt", "w") as f: f.write(all_df.to_string(index=False)) all_df.to_csv(f"{result_dir}/csv/{operation}.csv", index=False)