Loading Data.py +28 −15 Original line number Diff line number Diff line Loading @@ -17,15 +17,22 @@ class Data: input_coord_dimension = 2 output_data_dimension = 1 data_size = 1 training_data_size = 1 all_data = [] training_data_size = 1 training_indices = [] training_data = [] tagged_data = [] labelled_data = [] test_data_size = 1 test_indices = [] test_data = [] def __init__(self, data_size, training_data_size): self.data_size = data_size self.training_data_size = training_data_size self.test_data_size = data_size - training_data_size def norm(self,x,y): return np.sqrt(x**2 + y**2) Loading Loading @@ -66,14 +73,20 @@ class Data: def chooseTrainingData(self): def splitDataToTrainingTest(self): self.training_indices = random.sample(range(self.data_size), self.training_data_size) self.training_data = self.all_data[self.training_indices,:] self.test_indices = np.delete(range(self.data_size), self.training_indices, axis=0) self.test_data = self.all_data[self.test_indices,:] if len(self.test_data) == self.test_data_size: print("OK") def initializeDataSet(self, type): self.createAllData(type) self.addAdditionalInputs() self.chooseTrainingData() self.splitDataToTrainingTest() def createLinear(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) Loading @@ -82,7 +95,7 @@ class Data: self.all_data[i][1] = self.all_data[i][1] else: self.all_data[i][1] = self.all_data[i][1] self.tagged_data = 2*(self.all_data[:,1] < 0)-1 self.labelled_data = 2*(self.all_data[:,1] < 0)-1 def createElipse(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) Loading @@ -90,7 +103,7 @@ class Data: if self.norm((self.all_data[i][0])/1, (self.all_data[i][1])/1) < 0.7: self.all_data[i][0] = 0.5*self.all_data[i][0] self.all_data[i][1] = 0.5*self.all_data[i][1] self.tagged_data = 2*(self.norm((self.all_data[:,0]/1), (self.all_data[:,1])/1) < 0.7)-1 self.labelled_data = 2*(self.norm((self.all_data[:,0]/1), (self.all_data[:,1])/1) < 0.7)-1 def createSinus(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) Loading @@ -99,7 +112,7 @@ class Data: self.all_data[i][1] -= 0.0 else: self.all_data[i][1] += 0.0 self.tagged_data = 2*(self.all_data[:,1] < 0.5*np.sin(4*self.all_data[:,0]))-1 self.labelled_data = 2*(self.all_data[:,1] < 0.5*np.sin(4*self.all_data[:,0]))-1 def createHalfElipse(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) Loading @@ -113,7 +126,7 @@ class Data: self.all_data[i][1] += 0.2 else: self.all_data[i][1] = np.maximum(-1, self.all_data[i][1]-0.1) self.tagged_data = 2*(self.norm(self.all_data[:,0], self.all_data[:,1]+1.2) < 1.2)-1 self.labelled_data = 2*(self.norm(self.all_data[:,0], self.all_data[:,1]+1.2) < 1.2)-1 def createSpiral(self): # Define the center of the spiral, as well as the starting angle and the distance between the lines Loading @@ -125,7 +138,7 @@ class Data: # Create an empty list to store the points of the spiral self.all_data = np.zeros((self.data_size,2)) self.tagged_data = np.zeros(self.data_size) self.labelled_data = np.zeros(self.data_size) # Create a loop to generate the points of the spiral for i in range(math.floor(self.data_size/2)): Loading @@ -138,9 +151,9 @@ class Data: # Add the point to the list of points self.all_data[2*i] = [x,y] self.tagged_data[2*i] = -1 self.labelled_data[2*i] = -1 self.all_data[2*i+1] = [x_2,y_2] self.tagged_data[2*i+1] = 1 self.labelled_data[2*i+1] = 1 # Increase the angle and distance for the next iteration angle += 6*math.pi/self.data_size Loading @@ -148,7 +161,7 @@ class Data: def createTwoElipses(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) self.tagged_data = np.zeros(self.data_size) self.labelled_data = np.zeros(self.data_size) center_x1 = 0.4 center_y1 = 0.4 Loading @@ -161,10 +174,10 @@ class Data: if self.norm((self.all_data[i][0]-center_x1)/1, (self.all_data[i][1]-center_y1)/1) < range1: self.all_data[i][0] = 0.8*(self.all_data[i][0]-center_x1) + center_x1 self.all_data[i][1] = 0.8*(self.all_data[i][1]-center_y1) + center_y1 self.tagged_data[i] = 1 self.labelled_data[i] = 1 if self.norm((self.all_data[i][0]-center_x2)/1, (self.all_data[i][1]-center_y2)/1) < range2: self.all_data[i][0] = 0.8*(self.all_data[i][0]-center_x2) + center_x2 self.all_data[i][1] = 0.8*(self.all_data[i][1]-center_y2) + center_y2 self.tagged_data[i] = 1 self.tagged_data = 2*self.tagged_data-1 self.labelled_data[i] = 1 self.labelled_data = 2*self.labelled_data-1 LearningController.py +148 −73 Original line number Diff line number Diff line Loading @@ -4,32 +4,94 @@ from Data import Data import matplotlib as mpl import matplotlib.pyplot as plt import sys import os import random from matplotlib.patches import ConnectionPatch class LearningController: class TrainingController: folder_path = "model/" learning_rate = 0.1 l2_regularization = 0.0 batch_size = 30 max_epochs = 2000 max_epochs = 1000 train_loss = 1.0 train_loss_baseline = 1.0 test_loss = 1.0 test_loss_baseline = 1.0 total_loss = 1.0 total_loss_baseline = 1.0 train_loss_history = [] test_loss_history = [] def __init__(self, network): self.network = network network.setLearningParameters(self.learning_rate, self.l2_regularization) self.network.setLearningParameters(self.learning_rate, self.l2_regularization) def trainOnBatch(self, batch, tags, iterationsOnOneBatch): batch_size = len(batch) for n in range(iterationsOnOneBatch): self.network.resetBeforeBatchLearning() for j in range(batch_size): def trainOnBatch(self, batch, labels): for j in range(len(batch)): self.network.activation(batch[j]) #print("Po aktivaci:\n") #network.printNetwork() self.network.calculateErrors(tags[j]) self.network.updateGradient(batch_size) self.network.calculateErrors(labels[j]) self.network.updateGradient(len(batch)) def trainOneEpochBGD(self, dataset: Data): self.network.resetBeforeEpoch() shuffled_indexes = np.arange(dataset.training_data_size) np.random.shuffle(shuffled_indexes) number_of_full_batches = dataset.training_data_size // self.batch_size for batch_number in range(number_of_full_batches+1): if(batch_number != number_of_full_batches): batch_indexes = shuffled_indexes[batch_number*self.batch_size:(batch_number+1)*self.batch_size] else: batch_indexes = shuffled_indexes[batch_number*self.batch_size:] batch = dataset.all_data[batch_indexes,:] labels = dataset.labelled_data[batch_indexes] self.trainOnBatch(batch, labels) self.network.updateWeights() def trainOneEpochMiniBGD(self, dataset: Data): shuffled_indexes = np.arange(dataset.training_data_size) np.random.shuffle(shuffled_indexes) number_of_full_batches = dataset.training_data_size // self.batch_size for batch_number in range(number_of_full_batches+1): self.network.resetBeforeEpoch() if(batch_number != number_of_full_batches): batch_indexes = shuffled_indexes[batch_number*self.batch_size:(batch_number+1)*self.batch_size] else: batch_indexes = shuffled_indexes[batch_number*self.batch_size:] batch = dataset.all_data[batch_indexes,:] labels = dataset.labelled_data[batch_indexes] self.trainOnBatch(batch, labels) self.network.updateWeights() def trainOneEpochSGD(self, dataset: Data, multiple_passes): batch_indexes = random.sample(dataset.training_indices, training_controller.batch_size) batch = dataset.all_data[batch_indexes,:] labels = dataset.labelled_data[batch_indexes] for n in range(multiple_passes): self.network.resetBeforeEpoch() self.trainOnBatch(batch, labels) self.network.updateWeights() def trainOnDataset(self,dataset,number_of_epochs): for i in range(number_of_epochs): self.trainOneEpoch(dataset) # do functions for graphics def saveTrainedModel(self): pass #TODO saving plots, history in txt, weights in txt and numpy for network loading data_size = 1000 Loading @@ -40,87 +102,75 @@ if(data.initializeDataSet("spiral") == False): print("Dataset not defined") sys.exit(1) structure = [data.input_data_dimension,8,4,4,2,data.output_data_dimension] structure = [data.input_data_dimension,8,8,8,data.output_data_dimension] network = NeuralNetwork(structure) learning_controller = LearningController(network) plt.ion() fig, (tag,cost) = plt.subplots(1,2, figsize=(10, 5)) training_controller = TrainingController(network) os.makedirs(training_controller.folder_path, exist_ok=True) costFunction_training = [] costFunction_testing = [] plt.ion() fig, (state,cost) = plt.subplots(1,2, figsize=(10, 5)) test_points_x = np.linspace(-1,1,101) test_points_y = np.linspace(1,-1,101) tagged_data_A = np.zeros((len(data.all_data), data.input_data_dimension)) tagged_data_B = np.zeros((len(data.all_data), data.input_data_dimension)) labelled_data_A = np.zeros((data_size, data.input_data_dimension)) labelled_data_B = np.zeros((data_size, data.input_data_dimension)) for k in range(data_size): if data.tagged_data[k] > 0: tagged_data_A[k] = data.all_data[k] if data.labelled_data[k] > 0: labelled_data_A[k] = data.all_data[k] else: tagged_data_B[k] = data.all_data[k] labelled_data_B[k] = data.all_data[k] tagged_data_A = tagged_data_A[~np.all(tagged_data_A == 0, axis=1)] tagged_data_B = tagged_data_B[~np.all(tagged_data_B == 0, axis=1)] labelled_data_A = labelled_data_A[~np.all(labelled_data_A == 0, axis=1)] labelled_data_B = labelled_data_B[~np.all(labelled_data_B == 0, axis=1)] epoch = 0 epsilon = 0.01 epsilon = 0.05 costFunction = 1.0 costFunction_training_value = 1.0 while epoch < learning_controller.max_epochs and costFunction_training_value >= epsilon: while epoch < training_controller.max_epochs and training_controller.train_loss >= epsilon: print("Epoch "+str(epoch)+":") batch_indexes = random.sample(range(data.training_data_size), learning_controller.batch_size)#np.random.randint(0, training_data_size, batch_size) batch_indexes = random.sample(data.training_indices, learning_controller.batch_size) batch = data.all_data[batch_indexes,:] tags = data.tagged_data[batch_indexes] learning_controller.trainOnBatch(batch, tags, 10) network.updateCostFunction(data.all_data[data.training_indices], data.tagged_data[data.training_indices]) if epoch == 0: cost_function_training_baseline = network.cost_function costFunction_training.append((epoch, network.cost_function/cost_function_training_baseline)) training_value = network.cost_function training_controller.trainOneEpochSGD(data,10) #training_controller.trainOneEpochMiniBGD(data) network.updateCostFunction(np.delete(data.all_data, data.training_indices, axis=0), np.delete(data.tagged_data, data.training_indices)) training_controller.train_loss = training_controller.network.updateLossFunction(data.all_data[data.training_indices], data.labelled_data[data.training_indices]) training_controller.test_loss = training_controller.network.updateLossFunction(data.all_data[data.test_indices], data.labelled_data[data.test_indices]) training_controller.total_loss = training_controller.train_loss+training_controller.test_loss if epoch == 0: cost_function_testing_baseline = network.cost_function - cost_function_training_baseline training_controller.train_loss_baseline = training_controller.train_loss training_controller.test_loss_baseline = training_controller.test_loss training_controller.total_loss_baseline = training_controller.train_loss_baseline + training_controller.test_loss_baseline cost_function_baseline = cost_function_training_baseline + cost_function_testing_baseline training_controller.train_loss /= training_controller.train_loss_baseline training_controller.test_loss /= training_controller.test_loss_baseline training_controller.total_loss /= training_controller.total_loss_baseline costFunction_testing.append((epoch, (network.cost_function - training_value)/cost_function_testing_baseline)) training_controller.train_loss_history.append((epoch, training_controller.train_loss)) training_controller.test_loss_history.append((epoch, (training_controller.test_loss))) print("Cost function: "+str(network.cost_function/cost_function_baseline)) print("Training cost function: "+str(training_value/cost_function_training_baseline)) print("Cost function: "+str(training_controller.total_loss)) print("Training cost function: "+str(training_controller.train_loss)) costFunction = network.cost_function/cost_function_baseline costFunction_training_value = training_value/cost_function_training_baseline cost.plot(*zip(*costFunction_training), 'k-', label='Training data') cost.plot(*zip(*costFunction_testing), 'b-', label='Testing data') cost.plot(*zip(*training_controller.train_loss_history), 'k-', label='Training data') cost.plot(*zip(*training_controller.test_loss_history), 'b-', label='Testing data') if epoch % 20 == 0: colormap = network.computeHeatMap(test_points_x, test_points_y) colormap = training_controller.network.computeHeatMap(test_points_x, test_points_y) #heatmap = proc.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) tag.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) tag.plot(tagged_data_A[:,0], tagged_data_A[:,1], 'go', label="Class A") tag.plot(tagged_data_B[:,0], tagged_data_B[:,1], 'ro', label="Class B") tag.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data") state.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) state.plot(labelled_data_A[:,0], labelled_data_A[:,1], 'go', label="Class A") state.plot(labelled_data_B[:,0], labelled_data_B[:,1], 'ro', label="Class B") state.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data") if(epoch == 0): tag.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=3) state.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=3) #fig.colorbar(heatmap, orientation="horizontal") cost.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=2) Loading @@ -129,32 +179,57 @@ while epoch < learning_controller.max_epochs and costFunction_training_value >= epoch += 1 colormap = network.computeHeatMap(test_points_x, test_points_y) colormap = training_controller.network.computeHeatMap(test_points_x, test_points_y) #heatmap = proc.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) tag.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) tag.plot(tagged_data_A[:,0], tagged_data_A[:,1], 'go', label="Class A") tag.plot(tagged_data_B[:,0], tagged_data_B[:,1], 'ro', label="Class B") tag.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data") state.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) state.plot(labelled_data_A[:,0], labelled_data_A[:,1], 'go', label="Class A") state.plot(labelled_data_B[:,0], labelled_data_B[:,1], 'ro', label="Class B") state.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data") numberOfNeurons = np.sum(structure) maxNeuronsInLayer = np.max(structure) networkMap = network.computeWholeNeuralNetworkHeatMap(test_points_x, test_points_y, len(structure), maxNeuronsInLayer) networkMap = training_controller.network.computeWholeNeuralNetworkHeatMap(test_points_x, test_points_y, len(structure), maxNeuronsInLayer) #fig2, subfigs = plt.subplots(maxNeuronsInLayer, len(structure), figsize=(4*maxNeuronsInLayer, 4*maxNeuronsInLayer)) subfigs = [] fig2 = plt.figure(figsize=(4*len(structure), 4*maxNeuronsInLayer)) max_weights_in_layer = [] for i in range(len(structure)): max_weights_in_layer.append(np.max(training_controller.network.layer[i].weights)) max_weight = np.max(np.array(max_weights_in_layer)) for i in range(len(structure)): for j in range(structure[i]): subfig = fig2.add_subplot(maxNeuronsInLayer, len(structure), i+len(structure)*j+1) subfig.imshow(networkMap[i+len(structure)*j], cmap='RdYlGn', extent=([-1, 1, -1, 1]), vmin=-1, vmax=1) subfigs.append(subfig) index_of_first_neuron_in_previous_layer = int(np.sum(structure[:i-1])) if(i > 0): xy1=[1,0] xy2=[-1,0] for index in range(structure[i-1]): weight = training_controller.network.layer[i].weights[j][index]/max_weight if weight >= 0: con = ConnectionPatch(xyA=xy2, xyB=xy1, coordsA="data", coordsB="data", axesA=subfig, axesB=subfigs[index_of_first_neuron_in_previous_layer+index], color="red", linestyle='dashed', lw=3.0*weight) else: con = ConnectionPatch(xyA=xy2, xyB=xy1, coordsA="data", coordsB="data", axesA=subfig, axesB=subfigs[index_of_first_neuron_in_previous_layer+index], color="green", linestyle='dashed', lw=-3.0*weight) subfig.add_artist(con) fig2.canvas.draw() fig2.canvas.flush_events() plt.show(block='false') network.printWeights() training_controller.network.printWeights() training_controller.network.printBiases() network.printBiases() fig2.savefig(training_controller.folder_path+"heatmap", dpi=500) fig.savefig(training_controller.folder_path+"final_state", dpi=500) No newline at end of file NeuralNetwork.py +10 −7 Original line number Diff line number Diff line Loading @@ -10,7 +10,7 @@ class NeuralNetwork: gradient = [] learning_rate = 0.01 l2_regularization = 0.0 cost_function = 0.0 loss_function = 0.0 def __init__(self, structure): #Number of layers Loading Loading @@ -47,16 +47,16 @@ class NeuralNetwork: for i in range(self.depth-2,0,-1): self.errors[i] = np.multiply(np.matmul(np.transpose(self.layer[i+1].weights),self.errors[i+1]), self.derivativeOfActivationFunction(self.layer[i].neurons_notActivated)) def computeCostFunction(self, output): self.cost_function += 0.5*np.dot(self.layer[self.depth-1].neurons - output,self.layer[self.depth-1].neurons - output) def computeLossFunction(self, output): self.loss_function += 0.5*np.dot(self.layer[self.depth-1].neurons - output,self.layer[self.depth-1].neurons - output) def updateGradient(self, batch_size): for n in range(self.depth-1, 0, -1): self.layer[n].gradient += (1.0/batch_size)*np.outer(self.errors[n], self.layer[n-1].neurons) self.layer[n].gradient_bias += (1.0/batch_size)*self.errors[n] def resetBeforeBatchLearning(self): self.cost_function = 0.0 def resetBeforeEpoch(self): self.loss_function = 0.0 for n in range(self.depth): self.layer[n].gradient = np.zeros((self.layer[n].dimension, self.layer[n].prev_layer_dimension)) self.layer[n].gradient_bias = np.zeros(self.layer[n].dimension) Loading @@ -82,10 +82,13 @@ class NeuralNetwork: self.activation(input) return self.layer[self.depth-1].neurons def updateCostFunction(self, input, output): def updateLossFunction(self, input, output): for i in range(len(input)): self.activation(input[i]) self.computeCostFunction(output[i]) self.computeLossFunction(output[i]) value = self.loss_function self.loss_function = 0.0 return value def computeHeatMap(self, map_coord_x, map_coord_y): x = len(map_coord_x) Loading model/final_state.png 0 → 100644 +976 KiB Loading image diff... model/heatmap.png 0 → 100644 +3.53 MiB Loading image diff... Loading
Data.py +28 −15 Original line number Diff line number Diff line Loading @@ -17,15 +17,22 @@ class Data: input_coord_dimension = 2 output_data_dimension = 1 data_size = 1 training_data_size = 1 all_data = [] training_data_size = 1 training_indices = [] training_data = [] tagged_data = [] labelled_data = [] test_data_size = 1 test_indices = [] test_data = [] def __init__(self, data_size, training_data_size): self.data_size = data_size self.training_data_size = training_data_size self.test_data_size = data_size - training_data_size def norm(self,x,y): return np.sqrt(x**2 + y**2) Loading Loading @@ -66,14 +73,20 @@ class Data: def chooseTrainingData(self): def splitDataToTrainingTest(self): self.training_indices = random.sample(range(self.data_size), self.training_data_size) self.training_data = self.all_data[self.training_indices,:] self.test_indices = np.delete(range(self.data_size), self.training_indices, axis=0) self.test_data = self.all_data[self.test_indices,:] if len(self.test_data) == self.test_data_size: print("OK") def initializeDataSet(self, type): self.createAllData(type) self.addAdditionalInputs() self.chooseTrainingData() self.splitDataToTrainingTest() def createLinear(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) Loading @@ -82,7 +95,7 @@ class Data: self.all_data[i][1] = self.all_data[i][1] else: self.all_data[i][1] = self.all_data[i][1] self.tagged_data = 2*(self.all_data[:,1] < 0)-1 self.labelled_data = 2*(self.all_data[:,1] < 0)-1 def createElipse(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) Loading @@ -90,7 +103,7 @@ class Data: if self.norm((self.all_data[i][0])/1, (self.all_data[i][1])/1) < 0.7: self.all_data[i][0] = 0.5*self.all_data[i][0] self.all_data[i][1] = 0.5*self.all_data[i][1] self.tagged_data = 2*(self.norm((self.all_data[:,0]/1), (self.all_data[:,1])/1) < 0.7)-1 self.labelled_data = 2*(self.norm((self.all_data[:,0]/1), (self.all_data[:,1])/1) < 0.7)-1 def createSinus(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) Loading @@ -99,7 +112,7 @@ class Data: self.all_data[i][1] -= 0.0 else: self.all_data[i][1] += 0.0 self.tagged_data = 2*(self.all_data[:,1] < 0.5*np.sin(4*self.all_data[:,0]))-1 self.labelled_data = 2*(self.all_data[:,1] < 0.5*np.sin(4*self.all_data[:,0]))-1 def createHalfElipse(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) Loading @@ -113,7 +126,7 @@ class Data: self.all_data[i][1] += 0.2 else: self.all_data[i][1] = np.maximum(-1, self.all_data[i][1]-0.1) self.tagged_data = 2*(self.norm(self.all_data[:,0], self.all_data[:,1]+1.2) < 1.2)-1 self.labelled_data = 2*(self.norm(self.all_data[:,0], self.all_data[:,1]+1.2) < 1.2)-1 def createSpiral(self): # Define the center of the spiral, as well as the starting angle and the distance between the lines Loading @@ -125,7 +138,7 @@ class Data: # Create an empty list to store the points of the spiral self.all_data = np.zeros((self.data_size,2)) self.tagged_data = np.zeros(self.data_size) self.labelled_data = np.zeros(self.data_size) # Create a loop to generate the points of the spiral for i in range(math.floor(self.data_size/2)): Loading @@ -138,9 +151,9 @@ class Data: # Add the point to the list of points self.all_data[2*i] = [x,y] self.tagged_data[2*i] = -1 self.labelled_data[2*i] = -1 self.all_data[2*i+1] = [x_2,y_2] self.tagged_data[2*i+1] = 1 self.labelled_data[2*i+1] = 1 # Increase the angle and distance for the next iteration angle += 6*math.pi/self.data_size Loading @@ -148,7 +161,7 @@ class Data: def createTwoElipses(self): self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension)) self.tagged_data = np.zeros(self.data_size) self.labelled_data = np.zeros(self.data_size) center_x1 = 0.4 center_y1 = 0.4 Loading @@ -161,10 +174,10 @@ class Data: if self.norm((self.all_data[i][0]-center_x1)/1, (self.all_data[i][1]-center_y1)/1) < range1: self.all_data[i][0] = 0.8*(self.all_data[i][0]-center_x1) + center_x1 self.all_data[i][1] = 0.8*(self.all_data[i][1]-center_y1) + center_y1 self.tagged_data[i] = 1 self.labelled_data[i] = 1 if self.norm((self.all_data[i][0]-center_x2)/1, (self.all_data[i][1]-center_y2)/1) < range2: self.all_data[i][0] = 0.8*(self.all_data[i][0]-center_x2) + center_x2 self.all_data[i][1] = 0.8*(self.all_data[i][1]-center_y2) + center_y2 self.tagged_data[i] = 1 self.tagged_data = 2*self.tagged_data-1 self.labelled_data[i] = 1 self.labelled_data = 2*self.labelled_data-1
LearningController.py +148 −73 Original line number Diff line number Diff line Loading @@ -4,32 +4,94 @@ from Data import Data import matplotlib as mpl import matplotlib.pyplot as plt import sys import os import random from matplotlib.patches import ConnectionPatch class LearningController: class TrainingController: folder_path = "model/" learning_rate = 0.1 l2_regularization = 0.0 batch_size = 30 max_epochs = 2000 max_epochs = 1000 train_loss = 1.0 train_loss_baseline = 1.0 test_loss = 1.0 test_loss_baseline = 1.0 total_loss = 1.0 total_loss_baseline = 1.0 train_loss_history = [] test_loss_history = [] def __init__(self, network): self.network = network network.setLearningParameters(self.learning_rate, self.l2_regularization) self.network.setLearningParameters(self.learning_rate, self.l2_regularization) def trainOnBatch(self, batch, tags, iterationsOnOneBatch): batch_size = len(batch) for n in range(iterationsOnOneBatch): self.network.resetBeforeBatchLearning() for j in range(batch_size): def trainOnBatch(self, batch, labels): for j in range(len(batch)): self.network.activation(batch[j]) #print("Po aktivaci:\n") #network.printNetwork() self.network.calculateErrors(tags[j]) self.network.updateGradient(batch_size) self.network.calculateErrors(labels[j]) self.network.updateGradient(len(batch)) def trainOneEpochBGD(self, dataset: Data): self.network.resetBeforeEpoch() shuffled_indexes = np.arange(dataset.training_data_size) np.random.shuffle(shuffled_indexes) number_of_full_batches = dataset.training_data_size // self.batch_size for batch_number in range(number_of_full_batches+1): if(batch_number != number_of_full_batches): batch_indexes = shuffled_indexes[batch_number*self.batch_size:(batch_number+1)*self.batch_size] else: batch_indexes = shuffled_indexes[batch_number*self.batch_size:] batch = dataset.all_data[batch_indexes,:] labels = dataset.labelled_data[batch_indexes] self.trainOnBatch(batch, labels) self.network.updateWeights() def trainOneEpochMiniBGD(self, dataset: Data): shuffled_indexes = np.arange(dataset.training_data_size) np.random.shuffle(shuffled_indexes) number_of_full_batches = dataset.training_data_size // self.batch_size for batch_number in range(number_of_full_batches+1): self.network.resetBeforeEpoch() if(batch_number != number_of_full_batches): batch_indexes = shuffled_indexes[batch_number*self.batch_size:(batch_number+1)*self.batch_size] else: batch_indexes = shuffled_indexes[batch_number*self.batch_size:] batch = dataset.all_data[batch_indexes,:] labels = dataset.labelled_data[batch_indexes] self.trainOnBatch(batch, labels) self.network.updateWeights() def trainOneEpochSGD(self, dataset: Data, multiple_passes): batch_indexes = random.sample(dataset.training_indices, training_controller.batch_size) batch = dataset.all_data[batch_indexes,:] labels = dataset.labelled_data[batch_indexes] for n in range(multiple_passes): self.network.resetBeforeEpoch() self.trainOnBatch(batch, labels) self.network.updateWeights() def trainOnDataset(self,dataset,number_of_epochs): for i in range(number_of_epochs): self.trainOneEpoch(dataset) # do functions for graphics def saveTrainedModel(self): pass #TODO saving plots, history in txt, weights in txt and numpy for network loading data_size = 1000 Loading @@ -40,87 +102,75 @@ if(data.initializeDataSet("spiral") == False): print("Dataset not defined") sys.exit(1) structure = [data.input_data_dimension,8,4,4,2,data.output_data_dimension] structure = [data.input_data_dimension,8,8,8,data.output_data_dimension] network = NeuralNetwork(structure) learning_controller = LearningController(network) plt.ion() fig, (tag,cost) = plt.subplots(1,2, figsize=(10, 5)) training_controller = TrainingController(network) os.makedirs(training_controller.folder_path, exist_ok=True) costFunction_training = [] costFunction_testing = [] plt.ion() fig, (state,cost) = plt.subplots(1,2, figsize=(10, 5)) test_points_x = np.linspace(-1,1,101) test_points_y = np.linspace(1,-1,101) tagged_data_A = np.zeros((len(data.all_data), data.input_data_dimension)) tagged_data_B = np.zeros((len(data.all_data), data.input_data_dimension)) labelled_data_A = np.zeros((data_size, data.input_data_dimension)) labelled_data_B = np.zeros((data_size, data.input_data_dimension)) for k in range(data_size): if data.tagged_data[k] > 0: tagged_data_A[k] = data.all_data[k] if data.labelled_data[k] > 0: labelled_data_A[k] = data.all_data[k] else: tagged_data_B[k] = data.all_data[k] labelled_data_B[k] = data.all_data[k] tagged_data_A = tagged_data_A[~np.all(tagged_data_A == 0, axis=1)] tagged_data_B = tagged_data_B[~np.all(tagged_data_B == 0, axis=1)] labelled_data_A = labelled_data_A[~np.all(labelled_data_A == 0, axis=1)] labelled_data_B = labelled_data_B[~np.all(labelled_data_B == 0, axis=1)] epoch = 0 epsilon = 0.01 epsilon = 0.05 costFunction = 1.0 costFunction_training_value = 1.0 while epoch < learning_controller.max_epochs and costFunction_training_value >= epsilon: while epoch < training_controller.max_epochs and training_controller.train_loss >= epsilon: print("Epoch "+str(epoch)+":") batch_indexes = random.sample(range(data.training_data_size), learning_controller.batch_size)#np.random.randint(0, training_data_size, batch_size) batch_indexes = random.sample(data.training_indices, learning_controller.batch_size) batch = data.all_data[batch_indexes,:] tags = data.tagged_data[batch_indexes] learning_controller.trainOnBatch(batch, tags, 10) network.updateCostFunction(data.all_data[data.training_indices], data.tagged_data[data.training_indices]) if epoch == 0: cost_function_training_baseline = network.cost_function costFunction_training.append((epoch, network.cost_function/cost_function_training_baseline)) training_value = network.cost_function training_controller.trainOneEpochSGD(data,10) #training_controller.trainOneEpochMiniBGD(data) network.updateCostFunction(np.delete(data.all_data, data.training_indices, axis=0), np.delete(data.tagged_data, data.training_indices)) training_controller.train_loss = training_controller.network.updateLossFunction(data.all_data[data.training_indices], data.labelled_data[data.training_indices]) training_controller.test_loss = training_controller.network.updateLossFunction(data.all_data[data.test_indices], data.labelled_data[data.test_indices]) training_controller.total_loss = training_controller.train_loss+training_controller.test_loss if epoch == 0: cost_function_testing_baseline = network.cost_function - cost_function_training_baseline training_controller.train_loss_baseline = training_controller.train_loss training_controller.test_loss_baseline = training_controller.test_loss training_controller.total_loss_baseline = training_controller.train_loss_baseline + training_controller.test_loss_baseline cost_function_baseline = cost_function_training_baseline + cost_function_testing_baseline training_controller.train_loss /= training_controller.train_loss_baseline training_controller.test_loss /= training_controller.test_loss_baseline training_controller.total_loss /= training_controller.total_loss_baseline costFunction_testing.append((epoch, (network.cost_function - training_value)/cost_function_testing_baseline)) training_controller.train_loss_history.append((epoch, training_controller.train_loss)) training_controller.test_loss_history.append((epoch, (training_controller.test_loss))) print("Cost function: "+str(network.cost_function/cost_function_baseline)) print("Training cost function: "+str(training_value/cost_function_training_baseline)) print("Cost function: "+str(training_controller.total_loss)) print("Training cost function: "+str(training_controller.train_loss)) costFunction = network.cost_function/cost_function_baseline costFunction_training_value = training_value/cost_function_training_baseline cost.plot(*zip(*costFunction_training), 'k-', label='Training data') cost.plot(*zip(*costFunction_testing), 'b-', label='Testing data') cost.plot(*zip(*training_controller.train_loss_history), 'k-', label='Training data') cost.plot(*zip(*training_controller.test_loss_history), 'b-', label='Testing data') if epoch % 20 == 0: colormap = network.computeHeatMap(test_points_x, test_points_y) colormap = training_controller.network.computeHeatMap(test_points_x, test_points_y) #heatmap = proc.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) tag.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) tag.plot(tagged_data_A[:,0], tagged_data_A[:,1], 'go', label="Class A") tag.plot(tagged_data_B[:,0], tagged_data_B[:,1], 'ro', label="Class B") tag.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data") state.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) state.plot(labelled_data_A[:,0], labelled_data_A[:,1], 'go', label="Class A") state.plot(labelled_data_B[:,0], labelled_data_B[:,1], 'ro', label="Class B") state.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data") if(epoch == 0): tag.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=3) state.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=3) #fig.colorbar(heatmap, orientation="horizontal") cost.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=2) Loading @@ -129,32 +179,57 @@ while epoch < learning_controller.max_epochs and costFunction_training_value >= epoch += 1 colormap = network.computeHeatMap(test_points_x, test_points_y) colormap = training_controller.network.computeHeatMap(test_points_x, test_points_y) #heatmap = proc.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) tag.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) tag.plot(tagged_data_A[:,0], tagged_data_A[:,1], 'go', label="Class A") tag.plot(tagged_data_B[:,0], tagged_data_B[:,1], 'ro', label="Class B") tag.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data") state.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1) state.plot(labelled_data_A[:,0], labelled_data_A[:,1], 'go', label="Class A") state.plot(labelled_data_B[:,0], labelled_data_B[:,1], 'ro', label="Class B") state.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data") numberOfNeurons = np.sum(structure) maxNeuronsInLayer = np.max(structure) networkMap = network.computeWholeNeuralNetworkHeatMap(test_points_x, test_points_y, len(structure), maxNeuronsInLayer) networkMap = training_controller.network.computeWholeNeuralNetworkHeatMap(test_points_x, test_points_y, len(structure), maxNeuronsInLayer) #fig2, subfigs = plt.subplots(maxNeuronsInLayer, len(structure), figsize=(4*maxNeuronsInLayer, 4*maxNeuronsInLayer)) subfigs = [] fig2 = plt.figure(figsize=(4*len(structure), 4*maxNeuronsInLayer)) max_weights_in_layer = [] for i in range(len(structure)): max_weights_in_layer.append(np.max(training_controller.network.layer[i].weights)) max_weight = np.max(np.array(max_weights_in_layer)) for i in range(len(structure)): for j in range(structure[i]): subfig = fig2.add_subplot(maxNeuronsInLayer, len(structure), i+len(structure)*j+1) subfig.imshow(networkMap[i+len(structure)*j], cmap='RdYlGn', extent=([-1, 1, -1, 1]), vmin=-1, vmax=1) subfigs.append(subfig) index_of_first_neuron_in_previous_layer = int(np.sum(structure[:i-1])) if(i > 0): xy1=[1,0] xy2=[-1,0] for index in range(structure[i-1]): weight = training_controller.network.layer[i].weights[j][index]/max_weight if weight >= 0: con = ConnectionPatch(xyA=xy2, xyB=xy1, coordsA="data", coordsB="data", axesA=subfig, axesB=subfigs[index_of_first_neuron_in_previous_layer+index], color="red", linestyle='dashed', lw=3.0*weight) else: con = ConnectionPatch(xyA=xy2, xyB=xy1, coordsA="data", coordsB="data", axesA=subfig, axesB=subfigs[index_of_first_neuron_in_previous_layer+index], color="green", linestyle='dashed', lw=-3.0*weight) subfig.add_artist(con) fig2.canvas.draw() fig2.canvas.flush_events() plt.show(block='false') network.printWeights() training_controller.network.printWeights() training_controller.network.printBiases() network.printBiases() fig2.savefig(training_controller.folder_path+"heatmap", dpi=500) fig.savefig(training_controller.folder_path+"final_state", dpi=500) No newline at end of file
NeuralNetwork.py +10 −7 Original line number Diff line number Diff line Loading @@ -10,7 +10,7 @@ class NeuralNetwork: gradient = [] learning_rate = 0.01 l2_regularization = 0.0 cost_function = 0.0 loss_function = 0.0 def __init__(self, structure): #Number of layers Loading Loading @@ -47,16 +47,16 @@ class NeuralNetwork: for i in range(self.depth-2,0,-1): self.errors[i] = np.multiply(np.matmul(np.transpose(self.layer[i+1].weights),self.errors[i+1]), self.derivativeOfActivationFunction(self.layer[i].neurons_notActivated)) def computeCostFunction(self, output): self.cost_function += 0.5*np.dot(self.layer[self.depth-1].neurons - output,self.layer[self.depth-1].neurons - output) def computeLossFunction(self, output): self.loss_function += 0.5*np.dot(self.layer[self.depth-1].neurons - output,self.layer[self.depth-1].neurons - output) def updateGradient(self, batch_size): for n in range(self.depth-1, 0, -1): self.layer[n].gradient += (1.0/batch_size)*np.outer(self.errors[n], self.layer[n-1].neurons) self.layer[n].gradient_bias += (1.0/batch_size)*self.errors[n] def resetBeforeBatchLearning(self): self.cost_function = 0.0 def resetBeforeEpoch(self): self.loss_function = 0.0 for n in range(self.depth): self.layer[n].gradient = np.zeros((self.layer[n].dimension, self.layer[n].prev_layer_dimension)) self.layer[n].gradient_bias = np.zeros(self.layer[n].dimension) Loading @@ -82,10 +82,13 @@ class NeuralNetwork: self.activation(input) return self.layer[self.depth-1].neurons def updateCostFunction(self, input, output): def updateLossFunction(self, input, output): for i in range(len(input)): self.activation(input[i]) self.computeCostFunction(output[i]) self.computeLossFunction(output[i]) value = self.loss_function self.loss_function = 0.0 return value def computeHeatMap(self, map_coord_x, map_coord_y): x = len(map_coord_x) Loading