Commit 919940d6 authored by Illia Kolesnik's avatar Illia Kolesnik
Browse files

Added new doc scripts

parent fd781503
Loading
Loading
Loading
Loading
+85 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

import os
import shutil
import sys
import pandas as pd
import matplotlib.pyplot as plt

if len(sys.argv) != 3:
    print("Usage: ./script <input_csv> <output_dir>")
    exit(1)

input_csv = sys.argv[1]
output_dir = sys.argv[2]

if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(output_dir)

# Load the data
# OPERATION;REFINES;SIZE;DISTRIBUTION;LINEAR;CLASSIC
data = pd.read_csv(input_csv, delimiter=';')

# Drop the REFINES column
data = data.drop(columns=['REFINES'])

# Filter the data for 'refinement' operation
refinement_data = data[data['OPERATION'] == 'refinement']

# Get the unique distributions
distributions = refinement_data['DISTRIBUTION'].unique()

# Loop through each distribution and create a plot
for dist in distributions:
    # Check if the directory exists, remove it if it does
    result_path = f"{output_dir}/{dist}"
    os.makedirs(result_path)

    # Filter data for the current distribution
    dist_data = refinement_data[refinement_data['DISTRIBUTION'] == dist]
    dist_data = dist_data.drop(columns=['DISTRIBUTION', 'OPERATION'])

    # Calculate the speedup
    dist_data['SPEEDUP'] = dist_data['LINEAR'] / dist_data['CLASSIC']

    # Calc avg, min, max for each size
    dist_data = dist_data.groupby('SIZE').agg(
        {
            'LINEAR': ['mean', 'min', 'max'],
            'CLASSIC': ['mean', 'min', 'max'],
            'SPEEDUP': ['mean', 'min', 'max']
        }
    )

    # Rename the columns
    dist_data.columns = [
        'LINEAR_AVG', 'LINEAR_MIN', 'LINEAR_MAX',
        'CLASSIC_AVG', 'CLASSIC_MIN', 'CLASSIC_MAX',
        'SPEEDUP_AVG', 'SPEEDUP_MIN', 'SPEEDUP_MAX'
    ]

    # Plotting times
    plt.plot(dist_data['LINEAR_AVG'], label='Linear', marker='o')
    plt.plot(dist_data['CLASSIC_AVG'], label='Classic', marker='x')
    plt.title(f"{dist} distribution")
    plt.xlabel('Size')
    plt.ylabel('Time (s)')
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{result_path}/times.pdf")
    plt.close()

    # Plotting speedup, one is speedup, other is 1 for reference
    plt.plot(dist_data['SPEEDUP_AVG'], label='Speedup', marker='o')

    plt.title(f"{dist} distribution, speedup")
    plt.xlabel('Size')
    plt.ylabel('Speedup')
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{result_path}/speedup.pdf")
    plt.close()

    # Save the data to csv
    dist_data.to_csv(f"{result_path}/data.csv", sep=';')
+18 −28
Original line number Diff line number Diff line
@@ -13,26 +13,25 @@ def group_by_size(df: pd.DataFrame) -> pd.DataFrame:
    # LINEAR_MEAN, LINEAR_MIN, LINEAR_MAX
    # CLASSIC_MEAN, CLASSIC_MIN, CLASSIC_MAX
    # SPEEDUP_MEAN, SPEEDUP_MIN, SPEEDUP_MAX
    # But keep SIZE column!
    df = df.groupby("SIZE").agg(
        {
            "LINEAR": ["mean", "min", "max"],
            "CLASSIC": ["mean", "min", "max"],
            "SPEEDUP": ["mean", "min", "max"],
            "SPEEDUP": ["mean", "min", "max"]
        }
    )

    # Rename columns
    df.columns = [
        "LINEAR_AVG",
        "LINEAR_MIN",
        "LINEAR_MAX",
        "CLASSIC_AVG",
        "CLASSIC_MIN",
        "CLASSIC_MAX",
        "SPEEDUP_AVG",
        "SPEEDUP_MIN",
        "SPEEDUP_MAX",
        "LINEAR_AVG", "LINEAR_MIN", "LINEAR_MAX",
        "CLASSIC_AVG", "CLASSIC_MIN", "CLASSIC_MAX",
        "SPEEDUP_AVG", "SPEEDUP_MIN", "SPEEDUP_MAX"
    ]
    df = df.reset_index()

    df = df.reset_index()  # Reset index, so SIZE is a column again
    # print(df)

    return df


@@ -43,11 +42,8 @@ if len(sys.argv) != 3:
input_csv = sys.argv[1]
output_dir = sys.argv[2]

TITLE_FORMAT = "{operation}, {refine} refines, {distribution} distribution"
DIRNAME_FORMAT = "{distribution}_{refine}"

# If create dir doesn't exist, create it
if not os.path.exists(output_dir):
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(output_dir)

# CSV columns:
@@ -73,16 +69,12 @@ operations = [
for distribution in distributions:
    for refine in refines:
        # If directory exists, recreate it, even if it's empty
        dirname = DIRNAME_FORMAT.format(
            refine=refine, distribution=distribution
        )
        dirname = f"{distribution}_{refine}"
        result_dir = output_dir + "/" + dirname
        if os.path.exists(result_dir):
            shutil.rmtree(result_dir)
        os.makedirs(result_dir)
        os.makedirs(result_dir + "/times")
        os.makedirs(result_dir + "/speedup")
        os.makedirs(result_dir + "/txt")
        os.makedirs(result_dir + "/csv")

        for operation in operations:
            # Filter by distribution, refine, operation
@@ -90,9 +82,7 @@ for distribution in distributions:
            filtered = filtered.loc[filtered["REFINES"] == refine]
            filtered = filtered.loc[filtered["OPERATION"] == operation]

            title = TITLE_FORMAT.format(
                operation=operation, refine=refine, distribution=distribution
            )
            title = f"{operation}, {refine} refines, {distribution} distribution"

            # Create dataframes
            all_df = filtered[["SIZE", "LINEAR", "CLASSIC", "SPEEDUP"]]
@@ -122,9 +112,9 @@ for distribution in distributions:
            plt.ylabel("Speedup, times")
            plt.title(title)
            plt.legend(["Speedup, linear/classic"])
            plt.grid(True)
            plt.savefig(f"{result_dir}/speedup/{operation}.pdf")
            plt.close()

            # Save dataframe as table to file
            with open(f"{result_dir}/txt/{operation}.txt", "w") as f:
                f.write(all_df.to_string(index=False))
            all_df.to_csv(f"{result_dir}/csv/{operation}.csv", index=False)