Commit 33510b6e authored by Jan Kovář's avatar Jan Kovář
Browse files

Weights added to final heatmap + different types of batch GD created

parent 411e15f0
Loading
Loading
Loading
Loading
+28 −15
Original line number Diff line number Diff line
@@ -17,15 +17,22 @@ class Data:
    input_coord_dimension = 2
    output_data_dimension = 1
    data_size = 1
    training_data_size = 1
    all_data = []

    training_data_size = 1
    training_indices = []
    training_data = []
    tagged_data = []
    labelled_data = []

    test_data_size = 1
    test_indices = []
    test_data = []


    def __init__(self, data_size, training_data_size):
        self.data_size = data_size
        self.training_data_size = training_data_size
        self.test_data_size = data_size - training_data_size

    def norm(self,x,y):
        return np.sqrt(x**2 + y**2)
@@ -66,14 +73,20 @@ class Data:
        
        

    def chooseTrainingData(self):
    def splitDataToTrainingTest(self):
        self.training_indices = random.sample(range(self.data_size), self.training_data_size)
        self.training_data = self.all_data[self.training_indices,:]

        self.test_indices = np.delete(range(self.data_size), self.training_indices, axis=0)
        self.test_data = self.all_data[self.test_indices,:]

        if len(self.test_data) == self.test_data_size:
            print("OK")

    def initializeDataSet(self, type):
        self.createAllData(type)
        self.addAdditionalInputs()
        self.chooseTrainingData()
        self.splitDataToTrainingTest()

    def createLinear(self):
        self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
@@ -82,7 +95,7 @@ class Data:
                self.all_data[i][1] = self.all_data[i][1]
            else:
                self.all_data[i][1] = self.all_data[i][1]
        self.tagged_data = 2*(self.all_data[:,1] < 0)-1
        self.labelled_data = 2*(self.all_data[:,1] < 0)-1

    def createElipse(self):
        self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
@@ -90,7 +103,7 @@ class Data:
            if self.norm((self.all_data[i][0])/1, (self.all_data[i][1])/1) < 0.7:
                self.all_data[i][0] = 0.5*self.all_data[i][0]
                self.all_data[i][1] = 0.5*self.all_data[i][1]
        self.tagged_data = 2*(self.norm((self.all_data[:,0]/1), (self.all_data[:,1])/1) < 0.7)-1
        self.labelled_data = 2*(self.norm((self.all_data[:,0]/1), (self.all_data[:,1])/1) < 0.7)-1
    
    def createSinus(self):
        self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
@@ -99,7 +112,7 @@ class Data:
                self.all_data[i][1] -= 0.0
            else:
                self.all_data[i][1] += 0.0
        self.tagged_data = 2*(self.all_data[:,1] < 0.5*np.sin(4*self.all_data[:,0]))-1
        self.labelled_data = 2*(self.all_data[:,1] < 0.5*np.sin(4*self.all_data[:,0]))-1

    def createHalfElipse(self):
        self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
@@ -113,7 +126,7 @@ class Data:
                    self.all_data[i][1] += 0.2
                else:
                    self.all_data[i][1] = np.maximum(-1, self.all_data[i][1]-0.1)
        self.tagged_data = 2*(self.norm(self.all_data[:,0], self.all_data[:,1]+1.2) < 1.2)-1
        self.labelled_data = 2*(self.norm(self.all_data[:,0], self.all_data[:,1]+1.2) < 1.2)-1

    def createSpiral(self):
        # Define the center of the spiral, as well as the starting angle and the distance between the lines
@@ -125,7 +138,7 @@ class Data:

        # Create an empty list to store the points of the spiral
        self.all_data = np.zeros((self.data_size,2))
        self.tagged_data = np.zeros(self.data_size)
        self.labelled_data = np.zeros(self.data_size)

        # Create a loop to generate the points of the spiral
        for i in range(math.floor(self.data_size/2)):
@@ -138,9 +151,9 @@ class Data:
            
            # Add the point to the list of points
            self.all_data[2*i] = [x,y]
            self.tagged_data[2*i] = -1
            self.labelled_data[2*i] = -1
            self.all_data[2*i+1] = [x_2,y_2]
            self.tagged_data[2*i+1] = 1
            self.labelled_data[2*i+1] = 1
            
            # Increase the angle and distance for the next iteration
            angle += 6*math.pi/self.data_size
@@ -148,7 +161,7 @@ class Data:

    def createTwoElipses(self):
        self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
        self.tagged_data = np.zeros(self.data_size)
        self.labelled_data = np.zeros(self.data_size)

        center_x1 = 0.4
        center_y1 = 0.4
@@ -161,10 +174,10 @@ class Data:
            if self.norm((self.all_data[i][0]-center_x1)/1, (self.all_data[i][1]-center_y1)/1) < range1:
                self.all_data[i][0] = 0.8*(self.all_data[i][0]-center_x1) + center_x1
                self.all_data[i][1] = 0.8*(self.all_data[i][1]-center_y1) + center_y1
                self.tagged_data[i] = 1
                self.labelled_data[i] = 1
            if self.norm((self.all_data[i][0]-center_x2)/1, (self.all_data[i][1]-center_y2)/1) < range2:
                self.all_data[i][0] = 0.8*(self.all_data[i][0]-center_x2) + center_x2
                self.all_data[i][1] = 0.8*(self.all_data[i][1]-center_y2) + center_y2
                self.tagged_data[i] = 1
        self.tagged_data = 2*self.tagged_data-1
                self.labelled_data[i] = 1
        self.labelled_data = 2*self.labelled_data-1
+148 −73
Original line number Diff line number Diff line
@@ -4,32 +4,94 @@ from Data import Data
import matplotlib as mpl
import matplotlib.pyplot as plt
import sys 
import os
import random
from matplotlib.patches import ConnectionPatch


class LearningController:
class TrainingController:
    folder_path = "model/"
    learning_rate = 0.1
    l2_regularization = 0.0
    batch_size = 30
    max_epochs = 2000
    max_epochs = 1000
    train_loss = 1.0
    train_loss_baseline = 1.0
    test_loss = 1.0
    test_loss_baseline = 1.0
    total_loss = 1.0
    total_loss_baseline = 1.0
    
    train_loss_history = []
    test_loss_history = []

    def __init__(self, network):
        self.network = network
        network.setLearningParameters(self.learning_rate, self.l2_regularization)
        self.network.setLearningParameters(self.learning_rate, self.l2_regularization)
        
    def trainOnBatch(self, batch, tags, iterationsOnOneBatch):
        batch_size = len(batch)
        for n in range(iterationsOnOneBatch):
            self.network.resetBeforeBatchLearning()
            for j in range(batch_size):
    def trainOnBatch(self, batch, labels):
        for j in range(len(batch)):
            self.network.activation(batch[j])
                #print("Po aktivaci:\n")
                #network.printNetwork()
                self.network.calculateErrors(tags[j])
                self.network.updateGradient(batch_size)
            self.network.calculateErrors(labels[j])
            self.network.updateGradient(len(batch))
    
    def trainOneEpochBGD(self, dataset: Data):
        self.network.resetBeforeEpoch()

        shuffled_indexes = np.arange(dataset.training_data_size)
        np.random.shuffle(shuffled_indexes)
        
        number_of_full_batches = dataset.training_data_size // self.batch_size

        for batch_number in range(number_of_full_batches+1):
            
            if(batch_number != number_of_full_batches):
                batch_indexes = shuffled_indexes[batch_number*self.batch_size:(batch_number+1)*self.batch_size]
            else:
                batch_indexes = shuffled_indexes[batch_number*self.batch_size:]
            
            batch = dataset.all_data[batch_indexes,:]
            labels = dataset.labelled_data[batch_indexes]
            self.trainOnBatch(batch, labels)

        self.network.updateWeights()
        
    def trainOneEpochMiniBGD(self, dataset: Data):
        shuffled_indexes = np.arange(dataset.training_data_size)
        np.random.shuffle(shuffled_indexes)
        
        number_of_full_batches = dataset.training_data_size // self.batch_size

        for batch_number in range(number_of_full_batches+1):
            self.network.resetBeforeEpoch()
            if(batch_number != number_of_full_batches):
                batch_indexes = shuffled_indexes[batch_number*self.batch_size:(batch_number+1)*self.batch_size]
            else:
                batch_indexes = shuffled_indexes[batch_number*self.batch_size:]
            
            batch = dataset.all_data[batch_indexes,:]
            labels = dataset.labelled_data[batch_indexes]
            self.trainOnBatch(batch, labels)
            self.network.updateWeights()

    def trainOneEpochSGD(self, dataset: Data, multiple_passes):
            batch_indexes = random.sample(dataset.training_indices, training_controller.batch_size)
            batch = dataset.all_data[batch_indexes,:]
            
            labels = dataset.labelled_data[batch_indexes]
            for n in range(multiple_passes):
                self.network.resetBeforeEpoch()
                self.trainOnBatch(batch, labels)
                self.network.updateWeights()

    def trainOnDataset(self,dataset,number_of_epochs):
        for i in range(number_of_epochs):
            self.trainOneEpoch(dataset)
            # do functions for graphics
    
    def saveTrainedModel(self):
        pass
        #TODO saving plots, history in txt, weights in txt and numpy for network loading


data_size = 1000
@@ -40,87 +102,75 @@ if(data.initializeDataSet("spiral") == False):
    print("Dataset not defined")
    sys.exit(1)

structure = [data.input_data_dimension,8,4,4,2,data.output_data_dimension]
structure = [data.input_data_dimension,8,8,8,data.output_data_dimension]
network = NeuralNetwork(structure)

learning_controller = LearningController(network)

plt.ion()
fig, (tag,cost) = plt.subplots(1,2, figsize=(10, 5))
training_controller = TrainingController(network)

os.makedirs(training_controller.folder_path, exist_ok=True)

costFunction_training = []
costFunction_testing = []
plt.ion()
fig, (state,cost) = plt.subplots(1,2, figsize=(10, 5))

test_points_x = np.linspace(-1,1,101)
test_points_y = np.linspace(1,-1,101)

tagged_data_A = np.zeros((len(data.all_data), data.input_data_dimension))
tagged_data_B = np.zeros((len(data.all_data), data.input_data_dimension))
labelled_data_A = np.zeros((data_size, data.input_data_dimension))
labelled_data_B = np.zeros((data_size, data.input_data_dimension))

for k in range(data_size):
        if data.tagged_data[k] > 0:
            tagged_data_A[k] = data.all_data[k]
        if data.labelled_data[k] > 0:
            labelled_data_A[k] = data.all_data[k]
        else:
            tagged_data_B[k] = data.all_data[k]
            labelled_data_B[k] = data.all_data[k]

tagged_data_A = tagged_data_A[~np.all(tagged_data_A == 0, axis=1)]
tagged_data_B = tagged_data_B[~np.all(tagged_data_B == 0, axis=1)]
labelled_data_A = labelled_data_A[~np.all(labelled_data_A == 0, axis=1)]
labelled_data_B = labelled_data_B[~np.all(labelled_data_B == 0, axis=1)]

epoch = 0
epsilon = 0.01
epsilon = 0.05
costFunction = 1.0
costFunction_training_value = 1.0

while epoch < learning_controller.max_epochs and costFunction_training_value >= epsilon:
while epoch < training_controller.max_epochs and training_controller.train_loss >= epsilon:
    print("Epoch "+str(epoch)+":")

    batch_indexes = random.sample(range(data.training_data_size), learning_controller.batch_size)#np.random.randint(0, training_data_size, batch_size)
    batch_indexes = random.sample(data.training_indices, learning_controller.batch_size)
    batch = data.all_data[batch_indexes,:]
    
    tags = data.tagged_data[batch_indexes]

    learning_controller.trainOnBatch(batch, tags, 10)

    network.updateCostFunction(data.all_data[data.training_indices], data.tagged_data[data.training_indices])

    if epoch == 0:
        cost_function_training_baseline = network.cost_function

    costFunction_training.append((epoch, network.cost_function/cost_function_training_baseline))

    training_value =  network.cost_function
    training_controller.trainOneEpochSGD(data,10)
    #training_controller.trainOneEpochMiniBGD(data)

    network.updateCostFunction(np.delete(data.all_data, data.training_indices, axis=0), np.delete(data.tagged_data, data.training_indices))

    training_controller.train_loss = training_controller.network.updateLossFunction(data.all_data[data.training_indices], data.labelled_data[data.training_indices])
    training_controller.test_loss = training_controller.network.updateLossFunction(data.all_data[data.test_indices], data.labelled_data[data.test_indices])
    training_controller.total_loss = training_controller.train_loss+training_controller.test_loss
    if epoch == 0:
        cost_function_testing_baseline = network.cost_function - cost_function_training_baseline
        training_controller.train_loss_baseline = training_controller.train_loss
        training_controller.test_loss_baseline = training_controller.test_loss
        training_controller.total_loss_baseline = training_controller.train_loss_baseline + training_controller.test_loss_baseline

    cost_function_baseline = cost_function_training_baseline + cost_function_testing_baseline
    training_controller.train_loss /= training_controller.train_loss_baseline
    training_controller.test_loss /= training_controller.test_loss_baseline
    training_controller.total_loss /= training_controller.total_loss_baseline

    costFunction_testing.append((epoch, (network.cost_function - training_value)/cost_function_testing_baseline))
    training_controller.train_loss_history.append((epoch, training_controller.train_loss))
    training_controller.test_loss_history.append((epoch, (training_controller.test_loss)))

    print("Cost function: "+str(network.cost_function/cost_function_baseline))
    print("Training cost function: "+str(training_value/cost_function_training_baseline))
    print("Cost function: "+str(training_controller.total_loss))
    print("Training cost function: "+str(training_controller.train_loss))

    costFunction = network.cost_function/cost_function_baseline
    costFunction_training_value = training_value/cost_function_training_baseline

    cost.plot(*zip(*costFunction_training), 'k-', label='Training data')
    cost.plot(*zip(*costFunction_testing), 'b-', label='Testing data')
    cost.plot(*zip(*training_controller.train_loss_history), 'k-', label='Training data')
    cost.plot(*zip(*training_controller.test_loss_history), 'b-', label='Testing data')

    if epoch % 20 == 0:
        colormap = network.computeHeatMap(test_points_x, test_points_y)
        colormap = training_controller.network.computeHeatMap(test_points_x, test_points_y)

        #heatmap = proc.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
        tag.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
        tag.plot(tagged_data_A[:,0], tagged_data_A[:,1], 'go', label="Class A")
        tag.plot(tagged_data_B[:,0], tagged_data_B[:,1], 'ro', label="Class B")
        tag.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data")
        state.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
        state.plot(labelled_data_A[:,0], labelled_data_A[:,1], 'go', label="Class A")
        state.plot(labelled_data_B[:,0], labelled_data_B[:,1], 'ro', label="Class B")
        state.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data")
        
        if(epoch == 0):
            tag.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=3)
            state.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=3)
            #fig.colorbar(heatmap, orientation="horizontal")
            cost.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=2)

@@ -129,32 +179,57 @@ while epoch < learning_controller.max_epochs and costFunction_training_value >=
    
    epoch += 1
        
colormap = network.computeHeatMap(test_points_x, test_points_y)
colormap = training_controller.network.computeHeatMap(test_points_x, test_points_y)

#heatmap = proc.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
tag.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
tag.plot(tagged_data_A[:,0], tagged_data_A[:,1], 'go', label="Class A")
tag.plot(tagged_data_B[:,0], tagged_data_B[:,1], 'ro', label="Class B")
tag.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data")
state.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
state.plot(labelled_data_A[:,0], labelled_data_A[:,1], 'go', label="Class A")
state.plot(labelled_data_B[:,0], labelled_data_B[:,1], 'ro', label="Class B")
state.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data")

numberOfNeurons = np.sum(structure)
maxNeuronsInLayer = np.max(structure)

networkMap = network.computeWholeNeuralNetworkHeatMap(test_points_x, test_points_y, len(structure), maxNeuronsInLayer)
networkMap = training_controller.network.computeWholeNeuralNetworkHeatMap(test_points_x, test_points_y, len(structure), maxNeuronsInLayer)

#fig2, subfigs = plt.subplots(maxNeuronsInLayer, len(structure), figsize=(4*maxNeuronsInLayer, 4*maxNeuronsInLayer))

subfigs = []
fig2 = plt.figure(figsize=(4*len(structure), 4*maxNeuronsInLayer))

max_weights_in_layer = []
for i in range(len(structure)):
    max_weights_in_layer.append(np.max(training_controller.network.layer[i].weights))
max_weight = np.max(np.array(max_weights_in_layer))

for i in range(len(structure)):
    for j in range(structure[i]):
        subfig = fig2.add_subplot(maxNeuronsInLayer,  len(structure), i+len(structure)*j+1)
        subfig.imshow(networkMap[i+len(structure)*j], cmap='RdYlGn', extent=([-1, 1, -1, 1]), vmin=-1, vmax=1)
        subfigs.append(subfig)
        
        index_of_first_neuron_in_previous_layer = int(np.sum(structure[:i-1]))
        if(i > 0):
            xy1=[1,0]
            xy2=[-1,0]

            for index in range(structure[i-1]):
                weight = training_controller.network.layer[i].weights[j][index]/max_weight
                if weight >= 0:
                    con = ConnectionPatch(xyA=xy2, xyB=xy1, coordsA="data", coordsB="data", axesA=subfig, axesB=subfigs[index_of_first_neuron_in_previous_layer+index], color="red", linestyle='dashed', lw=3.0*weight)
                else:
                    con = ConnectionPatch(xyA=xy2, xyB=xy1, coordsA="data", coordsB="data", axesA=subfig, axesB=subfigs[index_of_first_neuron_in_previous_layer+index], color="green", linestyle='dashed', lw=-3.0*weight)
                
                subfig.add_artist(con)


fig2.canvas.draw()
fig2.canvas.flush_events()

plt.show(block='false')

network.printWeights()
training_controller.network.printWeights()

training_controller.network.printBiases()

network.printBiases()
fig2.savefig(training_controller.folder_path+"heatmap", dpi=500)
fig.savefig(training_controller.folder_path+"final_state", dpi=500)
 No newline at end of file
+10 −7
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ class NeuralNetwork:
    gradient = []
    learning_rate = 0.01
    l2_regularization = 0.0
    cost_function = 0.0
    loss_function = 0.0

    def __init__(self, structure):
        #Number of layers
@@ -47,16 +47,16 @@ class NeuralNetwork:
        for i in range(self.depth-2,0,-1):
            self.errors[i] = np.multiply(np.matmul(np.transpose(self.layer[i+1].weights),self.errors[i+1]), self.derivativeOfActivationFunction(self.layer[i].neurons_notActivated))
        
    def computeCostFunction(self, output):
        self.cost_function += 0.5*np.dot(self.layer[self.depth-1].neurons - output,self.layer[self.depth-1].neurons - output)
    def computeLossFunction(self, output):
        self.loss_function += 0.5*np.dot(self.layer[self.depth-1].neurons - output,self.layer[self.depth-1].neurons - output)

    def updateGradient(self, batch_size):
        for n in range(self.depth-1, 0, -1):
            self.layer[n].gradient += (1.0/batch_size)*np.outer(self.errors[n], self.layer[n-1].neurons)
            self.layer[n].gradient_bias += (1.0/batch_size)*self.errors[n]
    
    def resetBeforeBatchLearning(self):
        self.cost_function = 0.0
    def resetBeforeEpoch(self):
        self.loss_function = 0.0
        for n in range(self.depth):
            self.layer[n].gradient = np.zeros((self.layer[n].dimension, self.layer[n].prev_layer_dimension))
            self.layer[n].gradient_bias = np.zeros(self.layer[n].dimension)
@@ -82,10 +82,13 @@ class NeuralNetwork:
        self.activation(input)
        return self.layer[self.depth-1].neurons

    def updateCostFunction(self, input, output):
    def updateLossFunction(self, input, output):
        for i in range(len(input)):
            self.activation(input[i])
            self.computeCostFunction(output[i])
            self.computeLossFunction(output[i])
        value = self.loss_function
        self.loss_function = 0.0
        return value

    def computeHeatMap(self, map_coord_x, map_coord_y):
        x = len(map_coord_x)

model/final_state.png

0 → 100644
+976 KiB
Loading image diff...

model/heatmap.png

0 → 100644
+3.53 MiB
Loading image diff...