Commit 17346f74 authored by Radek Galabov's avatar Radek Galabov
Browse files

Scripts for Pulse Wave Velocity added. Not finished.

parent decbcf53
Loading
Loading
Loading
Loading
+151 −0
Original line number Original line Diff line number Diff line
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import scipy.fft as spfft

# constants
FLUX_DIR = (r"C:\Users\galr\Desktop\pacienti_rozpracovani\G318_patient62_bodyPart2_visit0_autom_manual\flux_sampPerc100")
PADDING = 10 #10000
DEGREE_OF_NOISE = 0*0.05 # percent of maximum value
RANGE_OF_FOURIER_COEFFS = [6, 7] # consider 6 to 12 coeffs
LIST_OF_REF_POINTS = [600] # 600
SUBFOLDER_NAME = "integrace_prutok"
CENTERLINE_FILENAME = "centerline_with_data_time=0.vtk"

########################################################################

def periodic_corr(x, y):
    """Periodic correlation, implemented using the FFT.

    x and y must be real sequences with the same length.
    """
    return np.fft.ifft(np.fft.fft(x) * np.fft.fft(y).conj()).real

def lag_finder(y1, y2, sr):
    '''Find lag between vector y1 and y2. They
    are sampled with sampling ratio sr.
    '''
    n = len(y1)

    corr = (periodic_corr(y2, y1)
            /(np.sqrt(periodic_corr(y1, y1)[int(n/2)]
            * periodic_corr(y2, y2)[int(n/2)])))

    delay_arr = np.linspace(-0.5*n/sr, 0.5*n/sr, n)
    delay = delay_arr[np.argmax(corr)]

    #plt.figure()
    #plt.plot(delay_arr, corr)
    #plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
    #plt.xlabel('Lag')
    #plt.ylabel('Correlation coeff')
    #plt.plot(y2)
    #plt.plot(y1)
    #plt.show()
    return delay

def filter_by_fft(signal,num_of_Fourier_coefficients):
    fft_of_signal = spfft.fft(signal)
    fft_of_signal = list(fft_of_signal)
    fft_of_signal[num_of_Fourier_coefficients:] = (len(fft_of_signal)
            - num_of_Fourier_coefficients)*[0]
    fft_of_signal = int(PADDING/2)*[0] + fft_of_signal + int(PADDING/2)*[0]
    smoothed_signal = np.abs(spfft.ifft(np.array(fft_of_signal)))
    return smoothed_signal

# load flux files
flux_dir = FLUX_DIR
files = glob.glob(flux_dir + os.sep + '*.npy')

# sort flux files ending with e.g. "flux9.npy"
sorted_files = len(files)*['']
for file in files:
    last_six_chars = file[-6:]

    if last_six_chars[0] == 'x':
        file_num = int(last_six_chars[-5:-4])
    else:
        file_num = int(last_six_chars[-6:-4])

    sorted_files[file_num] = file

# create 2D flux matrix
flux_matrix = []
for time in range(len(sorted_files)):
    flux_matrix.append(np.load(sorted_files[time]))
flux_matrix = np.transpose(flux_matrix)

# add noise and change polarity if need be
for point in range(len(flux_matrix)):
    sigma = DEGREE_OF_NOISE * np.max(np.abs(flux_matrix[point]))
    if (np.max(flux_matrix[point]) > 0):
        flux_matrix[point] = (flux_matrix[point]
            + np.random.normal(0,sigma,len(flux_matrix[point])))
    else:
        flux_matrix[point] = (-flux_matrix[point]
            + np.random.normal(0,sigma,len(flux_matrix[point])))

# compute lag
lag_list = []
for ref_point in LIST_OF_REF_POINTS:
    list_of_nums_of_Fourier_coeffs = range(RANGE_OF_FOURIER_COEFFS[0],
                                           RANGE_OF_FOURIER_COEFFS[1])
    for num_of_Fourier_coefficients in list_of_nums_of_Fourier_coeffs:
        lag_sublist = []
        for point in range(0,len(flux_matrix)):
            ref_curve = np.array(filter_by_fft(
                            flux_matrix[len(flux_matrix)-ref_point],
                            num_of_Fourier_coefficients))
            other_curve = np.array(filter_by_fft(
                            flux_matrix[point],
                            num_of_Fourier_coefficients))
            sampling_rate = len(ref_curve)
            lag = lag_finder(ref_curve, other_curve, sampling_rate)
            print(lag)
            lag_sublist.append(lag)
        print('----------------------------------------')
        lag_list.append(lag_sublist)

# wrap periodic data to one interval
wrapped_lag_list = (np.array(lag_list) % 1)
meaned_lag = np.mean(wrapped_lag_list, axis=0)

# plot lag
plt.figure()
#plt.plot(delay_arr, corr)
#plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
plt.xlabel('Lag')
plt.ylabel('Correlation coeff')
for i in range(len(wrapped_lag_list)):
    plt.plot(wrapped_lag_list[i])
#plt.show()

# plot mean lag
plt.figure()
#plt.plot(delay_arr, corr)
#plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
plt.xlabel('Lag')
plt.ylabel('Correlation coeff')
plt.plot(meaned_lag)
#plt.show()

# plot flow curves
plt.figure()
plt.xlabel('Lag')
plt.ylabel('Correlation coeff')
for point in range(len(flux_matrix)):
    plt.plot(filter_by_fft(
                            flux_matrix[point],
                            num_of_Fourier_coefficients))
#plt.show()

for timestep in range(len(sorted_files)):
    centerline_file_name = CENTERLINE_FILENAME[:-5] + str(timestep) + ".vtk"
    centerline_file_path = (os.sep).join(list(os.path.split(FLUX_DIR))[:-1]
            + [SUBFOLDER_NAME, centerline_file_name])
    with open(centerline_file_path, "a") as file_object:
        file_object.write("\n")
        file_object.write("TIMESHIFT 1 " + str(len(meaned_lag)) + " double\n")
        file_object.write(" ".join([str(elem) for elem in meaned_lag]))
    
+156 −0
Original line number Original line Diff line number Diff line
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import scipy.fft as spfft

# constants
FLUX_DIR = (r"C:\Users\galr\Desktop\pacienti_rozpracovani\G318_patient62_bodyPart2_visit0_autom_manual\")
PADDING = 10 #10000
DEGREE_OF_NOISE = 0*0.05 # percent of maximum value
RANGE_OF_FOURIER_COEFFS = [6, 7] # consider 6 to 12 coeffs
LIST_OF_REF_POINTS = [600] # 600
SUBFOLDER_NAME = "integrace_prutok"
CENTERLINE_FILENAME = "centerline_with_data_time=0.vtk"

########################################################################
def read_flux_from_vtk(file_path):
    with open(filepath) as file:
        file_content = file.read()
        print(file_content)

def periodic_corr(x, y):
    """Periodic correlation, implemented using the FFT.

    x and y must be real sequences with the same length.
    """
    return np.fft.ifft(np.fft.fft(x) * np.fft.fft(y).conj()).real

def lag_finder(y1, y2, sr):
    '''Find lag between vector y1 and y2. They
    are sampled with sampling ratio sr.
    '''
    n = len(y1)

    corr = (periodic_corr(y2, y1)
            /(np.sqrt(periodic_corr(y1, y1)[int(n/2)]
            * periodic_corr(y2, y2)[int(n/2)])))

    delay_arr = np.linspace(-0.5*n/sr, 0.5*n/sr, n)
    delay = delay_arr[np.argmax(corr)]

    #plt.figure()
    #plt.plot(delay_arr, corr)
    #plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
    #plt.xlabel('Lag')
    #plt.ylabel('Correlation coeff')
    #plt.plot(y2)
    #plt.plot(y1)
    #plt.show()
    return delay

def filter_by_fft(signal,num_of_Fourier_coefficients):
    fft_of_signal = spfft.fft(signal)
    fft_of_signal = list(fft_of_signal)
    fft_of_signal[num_of_Fourier_coefficients:] = (len(fft_of_signal)
            - num_of_Fourier_coefficients)*[0]
    fft_of_signal = int(PADDING/2)*[0] + fft_of_signal + int(PADDING/2)*[0]
    smoothed_signal = np.abs(spfft.ifft(np.array(fft_of_signal)))
    return smoothed_signal

# load flux files
files = glob.glob(FLUX_DIR + os.sep + SUBFOLDER_NAME + CENTERLINE_FILENAME[:-5] + '*.vtk')

# sort flux files ending with e.g. "flux9.npy"
sorted_files = len(files)*['']
for file in files:
    last_six_chars = file[-6:]

    if not last_six_chars[0].isdigit():
        file_num = int(last_six_chars[-5:-4])
    else:
        file_num = int(last_six_chars[-6:-4])

    sorted_files[file_num] = file

# create 2D flux matrix
flux_matrix = []
for time in range(len(sorted_files)):
    flux_matrix.append(np.load(sorted_files[time]))
flux_matrix = np.transpose(flux_matrix)

# add noise and change polarity if need be
for point in range(len(flux_matrix)):
    sigma = DEGREE_OF_NOISE * np.max(np.abs(flux_matrix[point]))
    if (np.max(flux_matrix[point]) > 0):
        flux_matrix[point] = (flux_matrix[point]
            + np.random.normal(0,sigma,len(flux_matrix[point])))
    else:
        flux_matrix[point] = (-flux_matrix[point]
            + np.random.normal(0,sigma,len(flux_matrix[point])))

# compute lag
lag_list = []
for ref_point in LIST_OF_REF_POINTS:
    list_of_nums_of_Fourier_coeffs = range(RANGE_OF_FOURIER_COEFFS[0],
                                           RANGE_OF_FOURIER_COEFFS[1])
    for num_of_Fourier_coefficients in list_of_nums_of_Fourier_coeffs:
        lag_sublist = []
        for point in range(0,len(flux_matrix)):
            ref_curve = np.array(filter_by_fft(
                            flux_matrix[len(flux_matrix)-ref_point],
                            num_of_Fourier_coefficients))
            other_curve = np.array(filter_by_fft(
                            flux_matrix[point],
                            num_of_Fourier_coefficients))
            sampling_rate = len(ref_curve)
            lag = lag_finder(ref_curve, other_curve, sampling_rate)
            print(lag)
            lag_sublist.append(lag)
        print('----------------------------------------')
        lag_list.append(lag_sublist)

# wrap periodic data to one interval
wrapped_lag_list = (np.array(lag_list) % 1)
meaned_lag = np.mean(wrapped_lag_list, axis=0)

# plot lag
plt.figure()
#plt.plot(delay_arr, corr)
#plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
plt.xlabel('Lag')
plt.ylabel('Correlation coeff')
for i in range(len(wrapped_lag_list)):
    plt.plot(wrapped_lag_list[i])
#plt.show()

# plot mean lag
plt.figure()
#plt.plot(delay_arr, corr)
#plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
plt.xlabel('Lag')
plt.ylabel('Correlation coeff')
plt.plot(meaned_lag)
#plt.show()

# plot flow curves
plt.figure()
plt.xlabel('Lag')
plt.ylabel('Correlation coeff')
for point in range(len(flux_matrix)):
    plt.plot(filter_by_fft(
                            flux_matrix[point],
                            num_of_Fourier_coefficients))
#plt.show()



#for timestep in range(len(sorted_files)):
#    centerline_file_name = CENTERLINE_FILENAME[:-5] + str(timestep) + ".vtk"
#    centerline_file_path = (os.sep).join(list(os.path.split(FLUX_DIR))[:-1]
#            + [SUBFOLDER_NAME, centerline_file_name])
#    with open(centerline_file_path, "a") as file_object:
#        file_object.write("\n")
#        file_object.write("TIMESHIFT 1 " + str(len(meaned_lag)) + " double\n")
#        file_object.write(" ".join([str(elem) for elem in meaned_lag]))
    
+245 −0
Original line number Original line Diff line number Diff line
import numpy as np
import os
import glob
import matplotlib.pyplot as plt
import scipy.fft as spfft
import scipy.signal as spsgn
import sys
from scipy.interpolate import splrep, splev
#import pelt

# constants
FLUX_DIR = (r"C:\Users\galr\Desktop\pacienti_rozpracovani\G318_patient62_bodyPart2_visit0_autom_manual")
PADDING = 10000 #10000
DEGREE_OF_NOISE = 0*0.05 # percent of maximum value
RANGE_OF_FOURIER_COEFFS = [6, 7] # consider 6 to 12 coeffs
LIST_OF_REF_POINTS = [400] # 600
SUBFOLDER_NAME = "integrace_prutok"
CENTERLINE_FILENAME = "centerline_with_data_time=0.vtk"

def load_data(flux_dir, subfolder_name, centerline_filename, quantity):
    files = glob.glob(FLUX_DIR + os.sep + SUBFOLDER_NAME + os.sep 
            + CENTERLINE_FILENAME[:-5] + '*' + CENTERLINE_FILENAME[-4:])
    sorted_files = sort_files(files)
    flux_matrix = []
    for file in sorted_files:
        flux_matrix.append(extract_quantity_from_textfile(
                read_vtk(file),quantity))
    return np.transpose(flux_matrix)

def sort_files(files):
    sorted_files = len(files)*['']
    for file in files:
        last_six_chars = file[-6:]
        if not last_six_chars[0].isdigit():
            file_num = int(last_six_chars[-5:-4])
        else:
            file_num = int(last_six_chars[-6:-4])
        sorted_files[file_num] = file
    return sorted_files

def read_vtk(file_path):
    with open(file_path) as file:
        file_content = file.read()
        return file_content

def extract_quantity_from_textfile(file_content,quantity):
    quant_idx = file_content.find(quantity)
    quant_data_start_idx = file_content[quant_idx:].find("\n") + 1
    for char_idx, char in enumerate(file_content[quant_idx + quant_data_start_idx:]):
        if not char.isalpha():
            pass
        else:
            break
    quant_data_end_idx = file_content[quant_idx +
            quant_data_start_idx:].find(char)
    quant_data = file_content[quant_idx + quant_data_start_idx
                             :quant_idx + quant_data_start_idx
                                  + quant_data_end_idx]
    quant_data = quant_data.split()
    quant_data = list(map(float, quant_data))
    return quant_data

def split_by_secID(secID_matrix, data_matrix):
    new_data_matrix = []
    current_section_list = []
    section_ID_num = 0
    for i in range(len(data_matrix)):
        if not (secID_matrix[i][0] == section_ID_num):
            new_data_matrix.append(current_section_list)
            section_ID_num += 1
            current_section_list = []
        current_section_list.append(data_matrix[i])
    new_data_matrix.append(current_section_list)
    return new_data_matrix

def compute_correlation(flux_matrix, padding, degree_of_noise,
                        range_of_fourier_coeffs, list_of_ref_points):
    flux_matrix = add_noise_and_change_polarity(flux_matrix, degree_of_noise)
    lag_list = []
    for ref_point in list_of_ref_points:
        print("Ref. point", ref_point)
        list_of_nums_of_Fourier_coeffs = range(range_of_fourier_coeffs[0],
                                               range_of_fourier_coeffs[1])
        for num_of_Fourier_coefficients in list_of_nums_of_Fourier_coeffs:
            print("FC:", num_of_Fourier_coefficients)
            lag_sublist = []
            if (len(flux_matrix)-ref_point <= 0):
                print("Ref. index too high.")
                sys.exit()
            for point in range(0,len(flux_matrix)):
                ref_curve = np.array(filter_by_fft(
                                flux_matrix[ref_point],
                                num_of_Fourier_coefficients, padding))
                other_curve = np.array(filter_by_fft(
                                flux_matrix[point],
                                num_of_Fourier_coefficients, padding))
                sampling_rate = len(ref_curve)
                lag = lag_finder(ref_curve, other_curve, sampling_rate)
                lag_sublist.append(lag)
            lag_list.append(lag_sublist)
    wrapped_lag = wrap_lag(lag_list)
    #print(wrapped_lag)
    return wrapped_lag

def wrap_lag(lag_list):
    wrapped_lag_list = ((np.array(lag_list) % 1)-0.5)
    meaned_lag = np.mean(wrapped_lag_list, axis=0)
    return meaned_lag

def periodic_corr(x, y):
    """Periodic correlation, implemented using the FFT.

    x and y must be real sequences with the same length.
    """
    return np.fft.ifft(np.fft.fft(x) * np.fft.fft(y).conj()).real

def lag_finder(y1, y2, sr):
    '''Find lag between vector y1 and y2. They
    are sampled with sampling ratio sr.
    '''
    n = len(y1)

    if (periodic_corr(y1, y1)[int(n/2)] == 0 or
        periodic_corr(y2, y2)[int(n/2)] == 0 or
        periodic_corr(y2, y1)[int(n/2)] == 0):
        delay = 0
    else:
        corr = (periodic_corr(y2, y1)
                /(np.sqrt(periodic_corr(y1, y1)[int(n/2)]
                * periodic_corr(y2, y2)[int(n/2)])))
        delay_arr = np.linspace(-0.5*n/sr, 0.5*n/sr, n)
        delay = delay_arr[np.argmax(corr)]
    #print(delay)

    #plt.figure()
    #plt.plot(delay_arr, corr)
    #plt.title('Lag: ' + str(np.round(delay, 3)) + ' s')
    #plt.xlabel('Lag')
    #plt.ylabel('Correlation coeff')
    #plt.plot(y2)
    #plt.plot(y1)
    #plt.show()
    return delay

def filter_by_fft(signal,num_of_Fourier_coefficients, padding):
    fft_of_signal = spfft.fft(signal)
    fft_of_signal = list(fft_of_signal)
    fft_of_signal[num_of_Fourier_coefficients:] = (len(fft_of_signal)
            - num_of_Fourier_coefficients)*[0]
    fft_of_signal = int(padding/2)*[0] + fft_of_signal + int(padding/2)*[0]
    smoothed_signal = np.abs(spfft.ifft(np.array(fft_of_signal)))
    return smoothed_signal

def add_noise_and_change_polarity(flux_matrix, degree_of_noise):
    print(flux_matrix)
    for point in range(len(flux_matrix)):
        sigma = degree_of_noise * np.max(np.abs(flux_matrix[point]))
        if (np.max(flux_matrix[point]) > 0):
            flux_matrix[point] = (flux_matrix[point]
                + np.random.normal(0,sigma,len(flux_matrix[point])))
        else:
            flux_matrix[point] = (-flux_matrix[point]
                + np.random.normal(0,sigma,len(flux_matrix[point])))
    return flux_matrix

def smooth_lag(lag):
    print(len(lag))
    lag = list(lag)
    num_of_nonzeros = 20
    fft_of_lag = list((spfft.fft(lag)))
    factor = (len(lag) - num_of_nonzeros -1)
    fft_of_lag[num_of_nonzeros + 1 :] = (len(lag) - num_of_nonzeros -1)*[0]
    lag = np.array(spfft.ifft((fft_of_lag)))
    print(factor)
    print(len(lag))
    return lag
    
    
def save_data():
    print("save")

def main():
    flux_matrix = load_data(FLUX_DIR, SUBFOLDER_NAME, CENTERLINE_FILENAME,"Flux")
    dist_matrix = load_data(FLUX_DIR, SUBFOLDER_NAME, CENTERLINE_FILENAME,"Distance")
    secID_matrix = load_data(FLUX_DIR, SUBFOLDER_NAME, CENTERLINE_FILENAME,"SectionID")
    wrapped_lag = compute_correlation(flux_matrix, PADDING, DEGREE_OF_NOISE,
            RANGE_OF_FOURIER_COEFFS, LIST_OF_REF_POINTS)
    wrapped_lag_split = split_by_secID(secID_matrix, wrapped_lag)
    dist_matrix_split = split_by_secID(secID_matrix, dist_matrix)

    #pelt.plot_segments(wrapped_lag,pelt.backtracking(pelt.pelt(wrapped_lag)))
    for i in range(len(wrapped_lag_split)):
        x = np.arange(0,len(wrapped_lag_split[i]))
        x2 = dist_matrix_split[i]
        noisy_data = wrapped_lag_split[i]
        f = splrep(x,noisy_data,k=5,s=0.1)
        #plt.plot(x, data, label="raw data")
        #plt.plot(x, noise, label="noise")
        #plt.plot(x, noisy_data, label="noisy data")
        #plt.plot(x, splev(x,f), label="fitted")
        #plt.plot(dist_matrix_split[i], splev(x,f,der=1)/10)

        print(np.shape(splev(x,f)),np.shape(np.gradient(x2)))
        plt.plot(x2, splev(x,f))
        
        plt.plot(x2, noisy_data)
        x3 = np.gradient(x2)
        x4 = np.transpose(x3,(0,2,1))[0][0][:]
        print(np.shape(x4),np.shape(np.transpose(x2)))
        x5 = np.transpose(x2)[0]
        pwv = 1/splev(x,f,der=1)
        plt.plot(x5,pwv)
    #plt.plot(np.transpose(dist_matrix)[0], wrapped_lag)    
    plt.show
    
    #plt.plot(1/(splev(x,f,der=1)/10/np.gradient(np.transpose(dist_matrix)[0])), label="PWV")
    #plt.plot(x, splev(x,f,der=2)/100, label="2nd derivative")
    plt.hlines(0,0,2)
    plt.legend(loc=0)
    plt.show()

    #plt.plot(np.transpose(dist_matrix)[0],splev(x,f), label="fit on dist")
    #plt.plot(x, splev(x,f,der=2)/100, label="2nd derivative")
    plt.hlines(0,0,2)
    plt.legend(loc=0)
    plt.show()

    #pelt.plot_segments(wrapped_lag,pelt.backtracking(pelt.pelt(wrapped_lag)))
    x = np.arange(0,len(wrapped_lag))
    noisy_data = wrapped_lag
    f = splrep(x,noisy_data,k=5,s=0.1)
    #plt.plot(x, data, label="raw data")
    #plt.plot(x, noise, label="noise")
    plt.plot(x, noisy_data, label="noisy data")
    plt.plot(x, splev(x,f), label="fitted")
    plt.plot(x, splev(x,f,der=1)/10, label="1st derivative")
    #plt.plot(x, splev(x,f,der=2)/100, label="2nd derivative")
    plt.hlines(0,0,2)
    plt.legend(loc=0)
    plt.show()

    

if __name__ == "__main__":
    main()
+2.59 KiB

File added.

No diff preview for this file type.

+128 −0
Original line number Original line Diff line number Diff line
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def pelt(data, **kwargs):
    
    # Pre-processing
    df = pd.DataFrame(data)
    df['squared'] = np.square(df[0])
    df['cumsum'] = np.cumsum(df[0], axis=0)
    df['cumsumsquared'] = np.cumsum(df['squared'], axis=0)
    df['diviseur'] = [x for x in range(1,len(df)+1)]
    df['mean'] = df['cumsum'] / df['diviseur']
    df['meansquared'] = np.square(df['mean'])
    df = df.append({
        0:0,
        'cumsum':0,
        'cumsumsquared':0,
        'diviseur':0,
        'mean':0,
        'meansquared':0,
        'squared':0}, ignore_index=True)
    
    # Penalty
    if 'penalty' in kwargs:
        B = kwargs['penalty']
    else:
        B = 2 * np.log(len(data))
    
    # Initilization
    Q = [-B] # Actual cost
    CP = [-1] # Last segment position
    T = [x for x in range(0,len(data))] # Authorized positions

    # Parse the data
    for pos in range(0,len(data)):
        
        costs = []
        min_cost_val_temp = float("inf")
        min_cost_pos_temp = -1
        
        # Parse all the Yi:pos that are still available
        for i in T:
            if i > pos:
                break

            # Square sum minus N times the square mean
            sos = df['cumsumsquared'].iloc[pos] - df['cumsumsquared'].iloc[i-1]
            n = pos - i + 1
            ms = (data[i:pos+1].mean())**2
            C = sos - (n*ms)

            # Cost test
            temp_cost = Q[i] + C + B
            if min_cost_val_temp > temp_cost:
                min_cost_val_temp = temp_cost
                min_cost_val_pos = i

        # Push the smallest cost
        Q.append(min_cost_val_temp)

        # Push the position
        CP.append(min_cost_val_pos)
        
        # Prunning
        for i in T:
            if i >= pos:
                break
            
            iplusone = i+1
            # Square sum minus N times the square mean
            sos = df['cumsumsquared'].iloc[pos] - df['cumsumsquared'].iloc[iplusone-1]
            n = pos - iplusone + 1
            ms = (data[iplusone:pos+1].mean())**2
            C = sos - (n*ms)
                
            if (Q[i] + C > Q[pos]):
                T.remove(i)
                
    return CP

def backtracking(CP):
    """ Apply backtracking to a CP vector from OP algorithm. Returns a "segments" vector.
    Args:
     CP: array-like 1 dimension.
    """

    # Data length
    n = len(CP)-1

    # Initialization
    segments = []
    changepoint = CP[n]

    # While the changepoint doesn't return the first point
    while changepoint > 0:

        segments.append(changepoint-1)
        changepoint = CP[changepoint]

    # The new vector was built with .append(), but since we parse from the end to the beginning,
    # We need to reverse it.
    segments.reverse()
    
    return segments

def plot_segments(data, segments, ylim=False):
    """ Plot segments generated by the OP & backtracking algorithms.
    Args:
        data: the data used to fit the model.
        segments: the segments returned by backtracking().
    """
    
    fig, ax = plt.subplots(figsize=(15,5))
    start = 0

    for end in segments:
        mean = data[start:end+1].sum() / len(data[start:end+1])
        plt.plot((start, end), (mean, mean))
        start = end+1

    end = len(data)-1
    mean = data[start:end+1].sum() / len(data[start:end+1])
    plt.plot((start, end), (mean, mean))
    
    if ylim != False:
        plt.ylim(ylim)
    plt.show()
Loading