Commit a17f9005 authored by Illia Kolesnik's avatar Illia Kolesnik
Browse files

Updated docs scripts

parent 407b6f37
Loading
Loading
Loading
Loading
+171 −0
Original line number Diff line number Diff line
%% Cell type:code id: tags:

``` python
import pandas as pd
import matplotlib.pyplot as plt

RESULT_DIR = 'pdfs'
```

%% Cell type:code id: tags:

``` python
def get_grouped_df(filename):
    result = pd.read_csv(filename, sep=';').drop(columns=['SIZE'])
    grouped_result = result.groupby(['DISTRIBUTION', 'OPERATION', 'REFINES']).agg({'LINEAR': ['mean', 'std', 'min', 'max'], 'CLASSIC': ['mean', 'std', 'min', 'max']}).reset_index()
    grouped_result.columns = ['DISTRIBUTION', 'OPERATION', 'REFINES', 'LINEAR_MEAN', 'LINEAR_STD', 'LINEAR_MIN', 'LINEAR_MAX', 'CLASSIC_MEAN', 'CLASSIC_STD', 'CLASSIC_MIN', 'CLASSIC_MAX']
    # Calculate the speedup
    grouped_result['SPEEDUP_MEAN'] = grouped_result['LINEAR_MEAN'] / grouped_result['CLASSIC_MEAN']

    return grouped_result

result_std = get_grouped_df('result_std.csv')
result_cuckoo = get_grouped_df('result_cuckoo.csv')
result_hopscotch = get_grouped_df('result_hopscotch.csv')
```

%% Cell type:code id: tags:

``` python
def plot_compare(df: pd.DataFrame, title, filename):
    fig, ax = plt.subplots()
    ax.errorbar(df['REFINES'], df['LINEAR_MEAN'], yerr=df['LINEAR_STD'], label='Linear')
    ax.errorbar(df['REFINES'], df['CLASSIC_MEAN'], yerr=df['CLASSIC_STD'], label='Classic')
    ax.set_xscale('log')
    ax.set_xlabel('Refines')
    ax.set_ylabel('Time (s)')
    ax.set_title(title)
    ax.legend()
    plt.savefig(f'{RESULT_DIR}/{filename}.pdf')
    df.to_csv(f'{RESULT_DIR}/{filename}.csv', sep=';', index=False)
    plt.show()

def process_table_impl(df: pd.DataFrame, impl: str):
    plot_compare(df[(df['DISTRIBUTION'] == 'normal') & (df['OPERATION'] == 'search')], 'Search in basic directions, normal distribution', f'{impl}_search_normal')
    plot_compare(df[(df['DISTRIBUTION'] == 'unif') & (df['OPERATION'] == 'search')], 'Search in basic directions, uniform distribution', f'{impl}_search_unif')
    plot_compare(df[(df['DISTRIBUTION'] == 'exp') & (df['OPERATION'] == 'search')], 'Search in basic directions, uniform distribution', f'{impl}_search_exp')

    plot_compare(df[(df['DISTRIBUTION'] == 'normal') & (df['OPERATION'] == 'search_all')], 'Search in all directions, normal distribution', f'{impl}_search_all_normal')
    plot_compare(df[(df['DISTRIBUTION'] == 'unif') & (df['OPERATION'] == 'search_all')], 'Search in all directions, uniform distribution', f'{impl}_search_all_unif')
    plot_compare(df[(df['DISTRIBUTION'] == 'exp') & (df['OPERATION'] == 'search_all')], 'Search in all directions, uniform distribution', f'{impl}_search_all_exp')

    plot_compare(df[(df['DISTRIBUTION'] == 'normal') & (df['OPERATION'] == 'refinement')], 'Refinement speed, normal distribution', f'{impl}_refine_normal')
    plot_compare(df[(df['DISTRIBUTION'] == 'unif') & (df['OPERATION'] == 'refinement')], 'Refinement speed, uniform distribution', f'{impl}_refine_unif')
    plot_compare(df[(df['DISTRIBUTION'] == 'exp') & (df['OPERATION'] == 'refinement')], 'Refinement speed, uniform distribution', f'{impl}_refine_exp')

# process_table_impl(result_std, 'std')
# process_table_impl(result_cuckoo, 'cuckoo')
process_table_impl(result_hopscotch, 'hopscotch')
```

%% Cell type:code id: tags:

``` python
# Compare different distributions
def compare_distributions(df: pd.DataFrame, title, filename):
    normal = df[(df['DISTRIBUTION'] == 'normal')]
    unif = df[(df['DISTRIBUTION'] == 'unif')]
    exp = df[(df['DISTRIBUTION'] == 'exp')]
    fig, ax = plt.subplots()
    ax.errorbar(normal['REFINES'], normal['MEAN'], yerr=normal['STD'], label='Normal')
    ax.errorbar(unif['REFINES'], unif['MEAN'], yerr=unif['STD'], label='Uniform')
    ax.errorbar(exp['REFINES'], exp['MEAN'], yerr=exp['STD'], label='Exponential')
    ax.set_xscale('log')
    ax.set_xlabel('Refines')
    ax.set_ylabel('Time (s)')
    ax.set_title(title)
    ax.legend()
    plt.savefig(f'{RESULT_DIR}/{filename}.pdf')
    df.to_csv(f'{RESULT_DIR}/{filename}.csv', sep=';', index=False)
    plt.show()

def compare_distributions_impl(df: pd.DataFrame, impl: str):
    search_classic = df.rename(columns={'CLASSIC_MEAN': 'MEAN', 'CLASSIC_STD': 'STD'})[(df['OPERATION'] == 'search')]
    search_linear = df.rename(columns={'LINEAR_MEAN': 'MEAN', 'LINEAR_STD': 'STD'})[(df['OPERATION'] == 'search')]
    search_all_classic = df.rename(columns={'CLASSIC_MEAN': 'MEAN', 'CLASSIC_STD': 'STD'})[(df['OPERATION'] == 'search_all')]
    search_all_linear = df.rename(columns={'LINEAR_MEAN': 'MEAN', 'LINEAR_STD': 'STD'})[(df['OPERATION'] == 'search_all')]
    refine_classic = df.rename(columns={'CLASSIC_MEAN': 'MEAN', 'CLASSIC_STD': 'STD'})[(df['OPERATION'] == 'refinement')]
    refine_linear = df.rename(columns={'LINEAR_MEAN': 'MEAN', 'LINEAR_STD': 'STD'})[(df['OPERATION'] == 'refinement')]

    # compare_distributions(search_classic, f'Search in basic directions, classic octree', f'{impl}_search_classic')
    # compare_distributions(search_all_classic, f'Search in all directions, classic octree', f'{impl}_search_all_classic')
    # compare_distributions(refine_classic, f'Refinement speed, classic octree', f'{impl}_refine_classic')

    # compare_distributions(search_linear, f'Search in basic directions, linear octree', f'{impl}_search_linear')
    # compare_distributions(search_all_linear, f'Search in all directions, linear octree', f'{impl}_search_all_linear')
    # compare_distributions(refine_linear, f'Refinement speed, linear octree', f'{impl}_refine_linear')

# compare_distributions_impl(result_std, 'std')
# compare_distributions_impl(result_cuckoo, 'cuckoo')
# compare_distributions_impl(result_hopscotch, 'hopscotch')
```

%% Cell type:code id: tags:

``` python
# Compare different implementations
def compare_implementations(std: pd.DataFrame, cuckoo: pd.DataFrame, hopscotch: pd.DataFrame, title, filename):
    fig, ax = plt.subplots()
    ax.errorbar(std['REFINES'], std['LINEAR_MEAN'], yerr=std['LINEAR_STD'], label='Standard')
    ax.errorbar(cuckoo['REFINES'], cuckoo['LINEAR_MEAN'], yerr=cuckoo['LINEAR_STD'], label='Cuckoo')
    ax.errorbar(hopscotch['REFINES'], hopscotch['LINEAR_MEAN'], yerr=hopscotch['LINEAR_STD'], label='Hopscotch')
    ax.set_xscale('log')
    ax.set_xlabel('Refines')
    ax.set_ylabel('Time (s)')
    ax.set_title(title)
    ax.legend()
    plt.savefig(f'{RESULT_DIR}/{filename}.pdf')
    plt.show()

normal_std = result_std[(result_std['DISTRIBUTION'] == 'normal')]
unif_std = result_std[(result_std['DISTRIBUTION'] == 'unif')]
exp_std = result_std[(result_std['DISTRIBUTION'] == 'exp')]
normal_std_search = normal_std[(normal_std['OPERATION'] == 'search')]
unif_std_search = unif_std[(unif_std['OPERATION'] == 'search')]
exp_std_search = exp_std[(exp_std['OPERATION'] == 'search')]
normal_std_search_all = normal_std[(normal_std['OPERATION'] == 'search_all')]
unif_std_search_all = unif_std[(unif_std['OPERATION'] == 'search_all')]
exp_std_search_all = exp_std[(exp_std['OPERATION'] == 'search_all')]
normal_std_refine = normal_std[(normal_std['OPERATION'] == 'refinement')]
unif_std_refine = unif_std[(unif_std['OPERATION'] == 'refinement')]
exp_std_refine = exp_std[(exp_std['OPERATION'] == 'refinement')]


normal_cuckoo = result_cuckoo[(result_cuckoo['DISTRIBUTION'] == 'normal')]
unif_cuckoo = result_cuckoo[(result_cuckoo['DISTRIBUTION'] == 'unif')]
exp_cuckoo = result_cuckoo[(result_cuckoo['DISTRIBUTION'] == 'exp')]
normal_cuckoo_search = normal_cuckoo[(normal_cuckoo['OPERATION'] == 'search')]
unif_cuckoo_search = unif_cuckoo[(unif_cuckoo['OPERATION'] == 'search')]
exp_cuckoo_search = exp_cuckoo[(exp_cuckoo['OPERATION'] == 'search')]
normal_cuckoo_search_all = normal_cuckoo[(normal_cuckoo['OPERATION'] == 'search_all')]
unif_cuckoo_search_all = unif_cuckoo[(unif_cuckoo['OPERATION'] == 'search_all')]
exp_cuckoo_search_all = exp_cuckoo[(exp_cuckoo['OPERATION'] == 'search_all')]
normal_cuckoo_refine = normal_cuckoo[(normal_cuckoo['OPERATION'] == 'refinement')]
unif_cuckoo_refine = unif_cuckoo[(unif_cuckoo['OPERATION'] == 'refinement')]
exp_cuckoo_refine = exp_cuckoo[(exp_cuckoo['OPERATION'] == 'refinement')]

normal_hopscotch = result_hopscotch[(result_hopscotch['DISTRIBUTION'] == 'normal')]
unif_hopscotch = result_hopscotch[(result_hopscotch['DISTRIBUTION'] == 'unif')]
exp_hopscotch = result_hopscotch[(result_hopscotch['DISTRIBUTION'] == 'exp')]
normal_hopscotch_search = normal_hopscotch[(normal_hopscotch['OPERATION'] == 'search')]
unif_hopscotch_search = unif_hopscotch[(unif_hopscotch['OPERATION'] == 'search')]
exp_hopscotch_search = exp_hopscotch[(exp_hopscotch['OPERATION'] == 'search')]
normal_hopscotch_search_all = normal_hopscotch[(normal_hopscotch['OPERATION'] == 'search_all')]
unif_hopscotch_search_all = unif_hopscotch[(unif_hopscotch['OPERATION'] == 'search_all')]
exp_hopscotch_search_all = exp_hopscotch[(exp_hopscotch['OPERATION'] == 'search_all')]
normal_hopscotch_refine = normal_hopscotch[(normal_hopscotch['OPERATION'] == 'refinement')]
unif_hopscotch_refine = unif_hopscotch[(unif_hopscotch['OPERATION'] == 'refinement')]
exp_hopscotch_refine = exp_hopscotch[(exp_hopscotch['OPERATION'] == 'refinement')]
# TODO: pridat klasicky
# compare_implementations(normal_std_search, normal_cuckoo_search, normal_hopscotch_search, 'Search in basic directions, normal distribution', 'compare_search_normal')
# compare_implementations(unif_std_search, unif_cuckoo_search, unif_hopscotch_search, 'Search in basic directions, uniform distribution', 'compare_search_unif')
# compare_implementations(exp_std_search, exp_cuckoo_search, exp_hopscotch_search, 'Search in basic directions, exponential distribution', 'compare_search_exp')

# compare_implementations(normal_std_search_all, normal_cuckoo_search_all, normal_hopscotch_search_all, 'Search in all directions, normal distribution', 'compare_search_all_normal')
# compare_implementations(unif_std_search_all, unif_cuckoo_search_all, unif_hopscotch_search_all, 'Search in all directions, uniform distribution', 'compare_search_all_unif')
# compare_implementations(exp_std_search_all, exp_cuckoo_search_all, exp_hopscotch_search_all, 'Search in all directions, exponential distribution', 'compare_search_all_exp')

# compare_implementations(normal_std_refine, normal_cuckoo_refine, normal_hopscotch_refine, 'Refinement speed, normal distribution', 'compare_refine_normal')
# compare_implementations(unif_std_refine, unif_cuckoo_refine, unif_hopscotch_refine, 'Refinement speed, uniform distribution', 'compare_refine_unif')
# compare_implementations(exp_std_refine, exp_cuckoo_refine, exp_hopscotch_refine, 'Refinement speed, exponential distribution', 'compare_refine_exp')
```

Benchmark/docs_seq_refine.py

deleted100755 → 0
+0 −85
Original line number Diff line number Diff line
#!/usr/bin/env python3

import os
import shutil
import sys
import pandas as pd
import matplotlib.pyplot as plt

if len(sys.argv) != 3:
    print("Usage: ./script <input_csv> <output_dir>")
    exit(1)

input_csv = sys.argv[1]
output_dir = sys.argv[2]

if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(output_dir)

# Load the data
# OPERATION;REFINES;SIZE;DISTRIBUTION;LINEAR;CLASSIC
data = pd.read_csv(input_csv, delimiter=';')

# Drop the REFINES column
data = data.drop(columns=['REFINES'])

# Filter the data for 'refinement' operation
refinement_data = data[data['OPERATION'] == 'refinement']

# Get the unique distributions
distributions = refinement_data['DISTRIBUTION'].unique()

# Loop through each distribution and create a plot
for dist in distributions:
    # Check if the directory exists, remove it if it does
    result_path = f"{output_dir}/{dist}"
    os.makedirs(result_path)

    # Filter data for the current distribution
    dist_data = refinement_data[refinement_data['DISTRIBUTION'] == dist]
    dist_data = dist_data.drop(columns=['DISTRIBUTION', 'OPERATION'])

    # Calculate the speedup
    dist_data['SPEEDUP'] = dist_data['LINEAR'] / dist_data['CLASSIC']

    # Calc avg, min, max for each size
    dist_data = dist_data.groupby('SIZE').agg(
        {
            'LINEAR': ['mean', 'min', 'max'],
            'CLASSIC': ['mean', 'min', 'max'],
            'SPEEDUP': ['mean', 'min', 'max']
        }
    )

    # Rename the columns
    dist_data.columns = [
        'LINEAR_AVG', 'LINEAR_MIN', 'LINEAR_MAX',
        'CLASSIC_AVG', 'CLASSIC_MIN', 'CLASSIC_MAX',
        'SPEEDUP_AVG', 'SPEEDUP_MIN', 'SPEEDUP_MAX'
    ]

    # Plotting times
    plt.plot(dist_data['LINEAR_AVG'], label='Linear', marker='o')
    plt.plot(dist_data['CLASSIC_AVG'], label='Classic', marker='x')
    plt.title(f"{dist} distribution")
    plt.xlabel('Size')
    plt.ylabel('Time (s)')
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{result_path}/times.pdf")
    plt.close()

    # Plotting speedup, one is speedup, other is 1 for reference
    plt.plot(dist_data['SPEEDUP_AVG'], label='Speedup', marker='o')

    plt.title(f"{dist} distribution, speedup")
    plt.xlabel('Size')
    plt.ylabel('Speedup')
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{result_path}/speedup.pdf")
    plt.close()

    # Save the data to csv
    dist_data.to_csv(f"{result_path}/data.csv", sep=';')

Benchmark/docs_seq_search.py

deleted100755 → 0
+0 −120
Original line number Diff line number Diff line
#!/usr/bin/env python3

import pandas as pd
import sys
import matplotlib.pyplot as plt
import os
import shutil


def group_by_size(df: pd.DataFrame) -> pd.DataFrame:
    # Group by size, ale calculate mean, min, max for each group
    # And add
    # LINEAR_MEAN, LINEAR_MIN, LINEAR_MAX
    # CLASSIC_MEAN, CLASSIC_MIN, CLASSIC_MAX
    # SPEEDUP_MEAN, SPEEDUP_MIN, SPEEDUP_MAX
    # But keep SIZE column!
    df = df.groupby("SIZE").agg(
        {
            "LINEAR": ["mean", "min", "max"],
            "CLASSIC": ["mean", "min", "max"],
            "SPEEDUP": ["mean", "min", "max"]
        }
    )

    # Rename columns
    df.columns = [
        "LINEAR_AVG", "LINEAR_MIN", "LINEAR_MAX",
        "CLASSIC_AVG", "CLASSIC_MIN", "CLASSIC_MAX",
        "SPEEDUP_AVG", "SPEEDUP_MIN", "SPEEDUP_MAX"
    ]

    df = df.reset_index()  # Reset index, so SIZE is a column again
    # print(df)

    return df


if len(sys.argv) != 3:
    print("Usage: ./script <input_csv> <output_dir>")
    exit(1)

input_csv = sys.argv[1]
output_dir = sys.argv[2]

if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(output_dir)

# CSV columns:
# OPERATION;REFINES;SIZE;DISTRIBUTION;LINEAR;CLASSIC
df = pd.read_csv(input_csv, sep=";")

df["SPEEDUP"] = df["LINEAR"] / df["CLASSIC"]

# Get unique distributions
distributions = df["DISTRIBUTION"].unique()

# Get unique refines
refines = df["REFINES"].unique()

# Get unique operations
operations = df["OPERATION"].unique()

# Remove refinement from operations
operations = [
    operation for operation in operations if operation != "refinement"]

# Create docs
for distribution in distributions:
    for refine in refines:
        # If directory exists, recreate it, even if it's empty
        dirname = f"{distribution}_{refine}"
        result_dir = output_dir + "/" + dirname
        os.makedirs(result_dir)
        os.makedirs(result_dir + "/times")
        os.makedirs(result_dir + "/speedup")
        os.makedirs(result_dir + "/csv")

        for operation in operations:
            # Filter by distribution, refine, operation
            filtered = df.loc[df["DISTRIBUTION"] == distribution]
            filtered = filtered.loc[filtered["REFINES"] == refine]
            filtered = filtered.loc[filtered["OPERATION"] == operation]

            title = f"{operation}, {refine} refines, {distribution} distribution"

            # Create dataframes
            all_df = filtered[["SIZE", "LINEAR", "CLASSIC", "SPEEDUP"]]
            all_df = group_by_size(all_df)
            all_df.sort_values(by=["SIZE"], inplace=True)

            # Create plots, save as pdf
            # Times
            all_df.plot(x="SIZE", y=["LINEAR_AVG", "CLASSIC_AVG"], marker="o")
            plt.xlabel("Size")
            plt.ylabel("Time (s)")
            plt.title(title)
            plt.legend(["Linear", "Classic"])
            plt.savefig(f"{result_dir}/times/{operation}.pdf")
            plt.close()

            # Speedup, show 2 lines: speedup and 1
            all_df.plot(x="SIZE", y=["SPEEDUP_AVG"], marker="o")
            # Add line y=1
            plt.plot(
                [all_df["SIZE"].min(), all_df["SIZE"].max()],
                [1, 1],
                color="red",
                linestyle="dashed",
            )
            plt.xlabel("Size")
            plt.ylabel("Speedup, times")
            plt.title(title)
            plt.legend(["Speedup, linear/classic"])
            plt.grid(True)
            plt.savefig(f"{result_dir}/speedup/{operation}.pdf")
            plt.close()

            # Save dataframe as table to file
            all_df.to_csv(f"{result_dir}/csv/{operation}.csv", index=False)