Weights added to final heatmap + different types of batch GD created (33510b6e) · Commits · Jan Kovář / DNN_playground

Data.py

+28 −15

Original line number	Diff line number	Diff line
		@@ -17,15 +17,22 @@ class Data:
		input_coord_dimension = 2
		output_data_dimension = 1
		data_size = 1
		training_data_size = 1
		all_data = []

		training_data_size = 1
		training_indices = []
		training_data = []
		tagged_data = []
		labelled_data = []

		test_data_size = 1
		test_indices = []
		test_data = []


		def __init__(self, data_size, training_data_size):
		self.data_size = data_size
		self.training_data_size = training_data_size
		self.test_data_size = data_size - training_data_size

		def norm(self,x,y):
		return np.sqrt(x2 + y2)
		@@ -66,14 +73,20 @@ class Data:



		def chooseTrainingData(self):
		def splitDataToTrainingTest(self):
		self.training_indices = random.sample(range(self.data_size), self.training_data_size)
		self.training_data = self.all_data[self.training_indices,:]

		self.test_indices = np.delete(range(self.data_size), self.training_indices, axis=0)
		self.test_data = self.all_data[self.test_indices,:]

		if len(self.test_data) == self.test_data_size:
		print("OK")

		def initializeDataSet(self, type):
		self.createAllData(type)
		self.addAdditionalInputs()
		self.chooseTrainingData()
		self.splitDataToTrainingTest()

		def createLinear(self):
		self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
		@@ -82,7 +95,7 @@ class Data:
		self.all_data[i][1] = self.all_data[i][1]
		else:
		self.all_data[i][1] = self.all_data[i][1]
		self.tagged_data = 2*(self.all_data[:,1] < 0)-1
		self.labelled_data = 2*(self.all_data[:,1] < 0)-1

		def createElipse(self):
		self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
		@@ -90,7 +103,7 @@ class Data:
		if self.norm((self.all_data[i][0])/1, (self.all_data[i][1])/1) < 0.7:
		self.all_data[i][0] = 0.5*self.all_data[i][0]
		self.all_data[i][1] = 0.5*self.all_data[i][1]
		self.tagged_data = 2*(self.norm((self.all_data[:,0]/1), (self.all_data[:,1])/1) < 0.7)-1
		self.labelled_data = 2*(self.norm((self.all_data[:,0]/1), (self.all_data[:,1])/1) < 0.7)-1

		def createSinus(self):
		self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
		@@ -99,7 +112,7 @@ class Data:
		self.all_data[i][1] -= 0.0
		else:
		self.all_data[i][1] += 0.0
		self.tagged_data = 2(self.all_data[:,1] < 0.5np.sin(4*self.all_data[:,0]))-1
		self.labelled_data = 2(self.all_data[:,1] < 0.5np.sin(4*self.all_data[:,0]))-1

		def createHalfElipse(self):
		self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
		@@ -113,7 +126,7 @@ class Data:
		self.all_data[i][1] += 0.2
		else:
		self.all_data[i][1] = np.maximum(-1, self.all_data[i][1]-0.1)
		self.tagged_data = 2*(self.norm(self.all_data[:,0], self.all_data[:,1]+1.2) < 1.2)-1
		self.labelled_data = 2*(self.norm(self.all_data[:,0], self.all_data[:,1]+1.2) < 1.2)-1

		def createSpiral(self):
		# Define the center of the spiral, as well as the starting angle and the distance between the lines
		@@ -125,7 +138,7 @@ class Data:

		# Create an empty list to store the points of the spiral
		self.all_data = np.zeros((self.data_size,2))
		self.tagged_data = np.zeros(self.data_size)
		self.labelled_data = np.zeros(self.data_size)

		# Create a loop to generate the points of the spiral
		for i in range(math.floor(self.data_size/2)):
		@@ -138,9 +151,9 @@ class Data:

		# Add the point to the list of points
		self.all_data[2*i] = [x,y]
		self.tagged_data[2*i] = -1
		self.labelled_data[2*i] = -1
		self.all_data[2*i+1] = [x_2,y_2]
		self.tagged_data[2*i+1] = 1
		self.labelled_data[2*i+1] = 1

		# Increase the angle and distance for the next iteration
		angle += 6*math.pi/self.data_size
		@@ -148,7 +161,7 @@ class Data:

		def createTwoElipses(self):
		self.all_data = 2*np.random.rand(self.data_size,self.input_coord_dimension)-np.ones((self.data_size,self.input_coord_dimension))
		self.tagged_data = np.zeros(self.data_size)
		self.labelled_data = np.zeros(self.data_size)

		center_x1 = 0.4
		center_y1 = 0.4
		@@ -161,10 +174,10 @@ class Data:
		if self.norm((self.all_data[i][0]-center_x1)/1, (self.all_data[i][1]-center_y1)/1) < range1:
		self.all_data[i][0] = 0.8*(self.all_data[i][0]-center_x1) + center_x1
		self.all_data[i][1] = 0.8*(self.all_data[i][1]-center_y1) + center_y1
		self.tagged_data[i] = 1
		self.labelled_data[i] = 1
		if self.norm((self.all_data[i][0]-center_x2)/1, (self.all_data[i][1]-center_y2)/1) < range2:
		self.all_data[i][0] = 0.8*(self.all_data[i][0]-center_x2) + center_x2
		self.all_data[i][1] = 0.8*(self.all_data[i][1]-center_y2) + center_y2
		self.tagged_data[i] = 1
		self.tagged_data = 2*self.tagged_data-1
		self.labelled_data[i] = 1
		self.labelled_data = 2*self.labelled_data-1

LearningController.py

+148 −73

Original line number	Diff line number	Diff line
		@@ -4,32 +4,94 @@ from Data import Data
		import matplotlib as mpl
		import matplotlib.pyplot as plt
		import sys
		import os
		import random
		from matplotlib.patches import ConnectionPatch


		class LearningController:
		class TrainingController:
		folder_path = "model/"
		learning_rate = 0.1
		l2_regularization = 0.0
		batch_size = 30
		max_epochs = 2000
		max_epochs = 1000
		train_loss = 1.0
		train_loss_baseline = 1.0
		test_loss = 1.0
		test_loss_baseline = 1.0
		total_loss = 1.0
		total_loss_baseline = 1.0

		train_loss_history = []
		test_loss_history = []

		def __init__(self, network):
		self.network = network
		network.setLearningParameters(self.learning_rate, self.l2_regularization)
		self.network.setLearningParameters(self.learning_rate, self.l2_regularization)

		def trainOnBatch(self, batch, tags, iterationsOnOneBatch):
		batch_size = len(batch)
		for n in range(iterationsOnOneBatch):
		self.network.resetBeforeBatchLearning()
		for j in range(batch_size):
		def trainOnBatch(self, batch, labels):
		for j in range(len(batch)):
		self.network.activation(batch[j])
		#print("Po aktivaci:\n")
		#network.printNetwork()
		self.network.calculateErrors(tags[j])
		self.network.updateGradient(batch_size)
		self.network.calculateErrors(labels[j])
		self.network.updateGradient(len(batch))

		def trainOneEpochBGD(self, dataset: Data):
		self.network.resetBeforeEpoch()

		shuffled_indexes = np.arange(dataset.training_data_size)
		np.random.shuffle(shuffled_indexes)

		number_of_full_batches = dataset.training_data_size // self.batch_size

		for batch_number in range(number_of_full_batches+1):

		if(batch_number != number_of_full_batches):
		batch_indexes = shuffled_indexes[batch_numberself.batch_size:(batch_number+1)self.batch_size]
		else:
		batch_indexes = shuffled_indexes[batch_number*self.batch_size:]

		batch = dataset.all_data[batch_indexes,:]
		labels = dataset.labelled_data[batch_indexes]
		self.trainOnBatch(batch, labels)

		self.network.updateWeights()

		def trainOneEpochMiniBGD(self, dataset: Data):
		shuffled_indexes = np.arange(dataset.training_data_size)
		np.random.shuffle(shuffled_indexes)

		number_of_full_batches = dataset.training_data_size // self.batch_size

		for batch_number in range(number_of_full_batches+1):
		self.network.resetBeforeEpoch()
		if(batch_number != number_of_full_batches):
		batch_indexes = shuffled_indexes[batch_numberself.batch_size:(batch_number+1)self.batch_size]
		else:
		batch_indexes = shuffled_indexes[batch_number*self.batch_size:]

		batch = dataset.all_data[batch_indexes,:]
		labels = dataset.labelled_data[batch_indexes]
		self.trainOnBatch(batch, labels)
		self.network.updateWeights()

		def trainOneEpochSGD(self, dataset: Data, multiple_passes):
		batch_indexes = random.sample(dataset.training_indices, training_controller.batch_size)
		batch = dataset.all_data[batch_indexes,:]

		labels = dataset.labelled_data[batch_indexes]
		for n in range(multiple_passes):
		self.network.resetBeforeEpoch()
		self.trainOnBatch(batch, labels)
		self.network.updateWeights()

		def trainOnDataset(self,dataset,number_of_epochs):
		for i in range(number_of_epochs):
		self.trainOneEpoch(dataset)
		# do functions for graphics

		def saveTrainedModel(self):
		pass
		#TODO saving plots, history in txt, weights in txt and numpy for network loading


		data_size = 1000
		@@ -40,87 +102,75 @@ if(data.initializeDataSet("spiral") == False):
		print("Dataset not defined")
		sys.exit(1)

		structure = [data.input_data_dimension,8,4,4,2,data.output_data_dimension]
		structure = [data.input_data_dimension,8,8,8,data.output_data_dimension]
		network = NeuralNetwork(structure)

		learning_controller = LearningController(network)

		plt.ion()
		fig, (tag,cost) = plt.subplots(1,2, figsize=(10, 5))
		training_controller = TrainingController(network)

		os.makedirs(training_controller.folder_path, exist_ok=True)

		costFunction_training = []
		costFunction_testing = []
		plt.ion()
		fig, (state,cost) = plt.subplots(1,2, figsize=(10, 5))

		test_points_x = np.linspace(-1,1,101)
		test_points_y = np.linspace(1,-1,101)

		tagged_data_A = np.zeros((len(data.all_data), data.input_data_dimension))
		tagged_data_B = np.zeros((len(data.all_data), data.input_data_dimension))
		labelled_data_A = np.zeros((data_size, data.input_data_dimension))
		labelled_data_B = np.zeros((data_size, data.input_data_dimension))

		for k in range(data_size):
		if data.tagged_data[k] > 0:
		tagged_data_A[k] = data.all_data[k]
		if data.labelled_data[k] > 0:
		labelled_data_A[k] = data.all_data[k]
		else:
		tagged_data_B[k] = data.all_data[k]
		labelled_data_B[k] = data.all_data[k]

		tagged_data_A = tagged_data_A[~np.all(tagged_data_A == 0, axis=1)]
		tagged_data_B = tagged_data_B[~np.all(tagged_data_B == 0, axis=1)]
		labelled_data_A = labelled_data_A[~np.all(labelled_data_A == 0, axis=1)]
		labelled_data_B = labelled_data_B[~np.all(labelled_data_B == 0, axis=1)]

		epoch = 0
		epsilon = 0.01
		epsilon = 0.05
		costFunction = 1.0
		costFunction_training_value = 1.0

		while epoch < learning_controller.max_epochs and costFunction_training_value >= epsilon:
		while epoch < training_controller.max_epochs and training_controller.train_loss >= epsilon:
		print("Epoch "+str(epoch)+":")

		batch_indexes = random.sample(range(data.training_data_size), learning_controller.batch_size)#np.random.randint(0, training_data_size, batch_size)
		batch_indexes = random.sample(data.training_indices, learning_controller.batch_size)
		batch = data.all_data[batch_indexes,:]

		tags = data.tagged_data[batch_indexes]

		learning_controller.trainOnBatch(batch, tags, 10)

		network.updateCostFunction(data.all_data[data.training_indices], data.tagged_data[data.training_indices])

		if epoch == 0:
		cost_function_training_baseline = network.cost_function

		costFunction_training.append((epoch, network.cost_function/cost_function_training_baseline))

		training_value = network.cost_function
		training_controller.trainOneEpochSGD(data,10)
		#training_controller.trainOneEpochMiniBGD(data)

		network.updateCostFunction(np.delete(data.all_data, data.training_indices, axis=0), np.delete(data.tagged_data, data.training_indices))

		training_controller.train_loss = training_controller.network.updateLossFunction(data.all_data[data.training_indices], data.labelled_data[data.training_indices])
		training_controller.test_loss = training_controller.network.updateLossFunction(data.all_data[data.test_indices], data.labelled_data[data.test_indices])
		training_controller.total_loss = training_controller.train_loss+training_controller.test_loss
		if epoch == 0:
		cost_function_testing_baseline = network.cost_function - cost_function_training_baseline
		training_controller.train_loss_baseline = training_controller.train_loss
		training_controller.test_loss_baseline = training_controller.test_loss
		training_controller.total_loss_baseline = training_controller.train_loss_baseline + training_controller.test_loss_baseline

		cost_function_baseline = cost_function_training_baseline + cost_function_testing_baseline
		training_controller.train_loss /= training_controller.train_loss_baseline
		training_controller.test_loss /= training_controller.test_loss_baseline
		training_controller.total_loss /= training_controller.total_loss_baseline

		costFunction_testing.append((epoch, (network.cost_function - training_value)/cost_function_testing_baseline))
		training_controller.train_loss_history.append((epoch, training_controller.train_loss))
		training_controller.test_loss_history.append((epoch, (training_controller.test_loss)))

		print("Cost function: "+str(network.cost_function/cost_function_baseline))
		print("Training cost function: "+str(training_value/cost_function_training_baseline))
		print("Cost function: "+str(training_controller.total_loss))
		print("Training cost function: "+str(training_controller.train_loss))

		costFunction = network.cost_function/cost_function_baseline
		costFunction_training_value = training_value/cost_function_training_baseline

		cost.plot(zip(costFunction_training), 'k-', label='Training data')
		cost.plot(zip(costFunction_testing), 'b-', label='Testing data')
		cost.plot(zip(training_controller.train_loss_history), 'k-', label='Training data')
		cost.plot(zip(training_controller.test_loss_history), 'b-', label='Testing data')

		if epoch % 20 == 0:
		colormap = network.computeHeatMap(test_points_x, test_points_y)
		colormap = training_controller.network.computeHeatMap(test_points_x, test_points_y)

		#heatmap = proc.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
		tag.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
		tag.plot(tagged_data_A[:,0], tagged_data_A[:,1], 'go', label="Class A")
		tag.plot(tagged_data_B[:,0], tagged_data_B[:,1], 'ro', label="Class B")
		tag.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data")
		state.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
		state.plot(labelled_data_A[:,0], labelled_data_A[:,1], 'go', label="Class A")
		state.plot(labelled_data_B[:,0], labelled_data_B[:,1], 'ro', label="Class B")
		state.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data")

		if(epoch == 0):
		tag.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=3)
		state.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=3)
		#fig.colorbar(heatmap, orientation="horizontal")
		cost.legend(loc='lower center', bbox_to_anchor=(0.5, 1.0), ncols=2)

		@@ -129,32 +179,57 @@ while epoch < learning_controller.max_epochs and costFunction_training_value >=

		epoch += 1

		colormap = network.computeHeatMap(test_points_x, test_points_y)
		colormap = training_controller.network.computeHeatMap(test_points_x, test_points_y)

		#heatmap = proc.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
		tag.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
		tag.plot(tagged_data_A[:,0], tagged_data_A[:,1], 'go', label="Class A")
		tag.plot(tagged_data_B[:,0], tagged_data_B[:,1], 'ro', label="Class B")
		tag.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data")
		state.imshow(colormap, cmap='RdYlGn', extent=([-1, 1, -1, 1]), interpolation='bilinear', vmin=-1, vmax=1)
		state.plot(labelled_data_A[:,0], labelled_data_A[:,1], 'go', label="Class A")
		state.plot(labelled_data_B[:,0], labelled_data_B[:,1], 'ro', label="Class B")
		state.plot(data.training_data[:,0], data.training_data[:,1], 'k.', label="Training data")

		numberOfNeurons = np.sum(structure)
		maxNeuronsInLayer = np.max(structure)

		networkMap = network.computeWholeNeuralNetworkHeatMap(test_points_x, test_points_y, len(structure), maxNeuronsInLayer)
		networkMap = training_controller.network.computeWholeNeuralNetworkHeatMap(test_points_x, test_points_y, len(structure), maxNeuronsInLayer)

		#fig2, subfigs = plt.subplots(maxNeuronsInLayer, len(structure), figsize=(4maxNeuronsInLayer, 4maxNeuronsInLayer))

		subfigs = []
		fig2 = plt.figure(figsize=(4len(structure), 4maxNeuronsInLayer))

		max_weights_in_layer = []
		for i in range(len(structure)):
		max_weights_in_layer.append(np.max(training_controller.network.layer[i].weights))
		max_weight = np.max(np.array(max_weights_in_layer))

		for i in range(len(structure)):
		for j in range(structure[i]):
		subfig = fig2.add_subplot(maxNeuronsInLayer, len(structure), i+len(structure)*j+1)
		subfig.imshow(networkMap[i+len(structure)*j], cmap='RdYlGn', extent=([-1, 1, -1, 1]), vmin=-1, vmax=1)
		subfigs.append(subfig)

		index_of_first_neuron_in_previous_layer = int(np.sum(structure[:i-1]))
		if(i > 0):
		xy1=[1,0]
		xy2=[-1,0]

		for index in range(structure[i-1]):
		weight = training_controller.network.layer[i].weights[j][index]/max_weight
		if weight >= 0:
		con = ConnectionPatch(xyA=xy2, xyB=xy1, coordsA="data", coordsB="data", axesA=subfig, axesB=subfigs[index_of_first_neuron_in_previous_layer+index], color="red", linestyle='dashed', lw=3.0*weight)
		else:
		con = ConnectionPatch(xyA=xy2, xyB=xy1, coordsA="data", coordsB="data", axesA=subfig, axesB=subfigs[index_of_first_neuron_in_previous_layer+index], color="green", linestyle='dashed', lw=-3.0*weight)

		subfig.add_artist(con)


		fig2.canvas.draw()
		fig2.canvas.flush_events()

		plt.show(block='false')

		network.printWeights()
		training_controller.network.printWeights()

		training_controller.network.printBiases()

		network.printBiases()
		fig2.savefig(training_controller.folder_path+"heatmap", dpi=500)
		fig.savefig(training_controller.folder_path+"final_state", dpi=500)
		No newline at end of file

NeuralNetwork.py

+10 −7

Original line number	Diff line number	Diff line
		@@ -10,7 +10,7 @@ class NeuralNetwork:
		gradient = []
		learning_rate = 0.01
		l2_regularization = 0.0
		cost_function = 0.0
		loss_function = 0.0

		def __init__(self, structure):
		#Number of layers
		@@ -47,16 +47,16 @@ class NeuralNetwork:
		for i in range(self.depth-2,0,-1):
		self.errors[i] = np.multiply(np.matmul(np.transpose(self.layer[i+1].weights),self.errors[i+1]), self.derivativeOfActivationFunction(self.layer[i].neurons_notActivated))

		def computeCostFunction(self, output):
		self.cost_function += 0.5*np.dot(self.layer[self.depth-1].neurons - output,self.layer[self.depth-1].neurons - output)
		def computeLossFunction(self, output):
		self.loss_function += 0.5*np.dot(self.layer[self.depth-1].neurons - output,self.layer[self.depth-1].neurons - output)

		def updateGradient(self, batch_size):
		for n in range(self.depth-1, 0, -1):
		self.layer[n].gradient += (1.0/batch_size)*np.outer(self.errors[n], self.layer[n-1].neurons)
		self.layer[n].gradient_bias += (1.0/batch_size)*self.errors[n]

		def resetBeforeBatchLearning(self):
		self.cost_function = 0.0
		def resetBeforeEpoch(self):
		self.loss_function = 0.0
		for n in range(self.depth):
		self.layer[n].gradient = np.zeros((self.layer[n].dimension, self.layer[n].prev_layer_dimension))
		self.layer[n].gradient_bias = np.zeros(self.layer[n].dimension)
		@@ -82,10 +82,13 @@ class NeuralNetwork:
		self.activation(input)
		return self.layer[self.depth-1].neurons

		def updateCostFunction(self, input, output):
		def updateLossFunction(self, input, output):
		for i in range(len(input)):
		self.activation(input[i])
		self.computeCostFunction(output[i])
		self.computeLossFunction(output[i])
		value = self.loss_function
		self.loss_function = 0.0
		return value

		def computeHeatMap(self, map_coord_x, map_coord_y):
		x = len(map_coord_x)

model/final_state.png

0 → 100644

+976 KiB

Loading image diff...

model/heatmap.png

0 → 100644

+3.53 MiB

Loading image diff...