"""Dataset Features Related Utils"""from .normalize import normalize
from .generate_polynomials import generate_polynomials
from .generate_sinusoids import generate_sinusoids
from .prepare_for_training import prepare_for_training
"""Add polynomial features to the features set"""import numpy as np
from .normalize import normalizedef generate_polynomials(dataset, polynomial_degree, normalize_data=False):"""Extends data set with polynomial features of certain degree.Returns a new feature array with more features, comprising ofx1, x2, x1^2, x2^2, x1*x2, x1*x2^2, etc.:param dataset: dataset that we want to generate polynomials for.:param polynomial_degree: the max power of new features.:param normalize_data: flag that indicates whether polynomials need to normalized or not."""# Split features on two halves.features_split = np.array_split(dataset, 2, axis=1)dataset_1 = features_split[0]dataset_2 = features_split[1]# Extract sets parameters.(num_examples_1, num_features_1) = dataset_1.shape(num_examples_2, num_features_2) = dataset_2.shape# Check if two sets have equal amount of rows.if num_examples_1 != num_examples_2:raise ValueError('Can not generate polynomials for two sets with different number of rows')# Check if at list one set has features.if num_features_1 == 0 and num_features_2 == 0:raise ValueError('Can not generate polynomials for two sets with no columns')# Replace empty set with non-empty one.if num_features_1 == 0:dataset_1 = dataset_2elif num_features_2 == 0:dataset_2 = dataset_1# Make sure that sets have the same number of features in order to be able to multiply them.num_features = num_features_1 if num_features_1 < num_examples_2 else num_features_2dataset_1 = dataset_1[:, :num_features]dataset_2 = dataset_2[:, :num_features]# Create polynomials matrix.polynomials = np.empty((num_examples_1, 0))# Generate polynomial features of specified degree.for i in range(1, polynomial_degree + 1):for j in range(i + 1):polynomial_feature = (dataset_1 ** (i - j)) * (dataset_2 ** j)polynomials = np.concatenate((polynomials, polynomial_feature), axis=1)# Normalize polynomials if needed.if normalize_data:polynomials = normalize(polynomials)[0]# Return generated polynomial features.return polynomials
"""Add sinusoid features to the features set"""import numpy as npdef generate_sinusoids(dataset, sinusoid_degree):"""Extends data set with sinusoid features.Returns a new feature array with more features, comprising ofsin(x).:param dataset: data set.:param sinusoid_degree: multiplier for sinusoid parameter multiplications"""# Create sinusoids matrix.num_examples = dataset.shape[0]sinusoids = np.empty((num_examples, 0))# Generate sinusoid features of specified degree.for degree in range(1, sinusoid_degree + 1):sinusoid_features = np.sin(degree * dataset)sinusoids = np.concatenate((sinusoids, sinusoid_features), axis=1)# Return generated sinusoidal features.return sinusoids
"""Normalize features"""import numpy as npdef normalize(features):"""Normalize features.Normalizes input features X. Returns a normalized version of X where the mean value ofeach feature is 0 and deviation is close to 1.:param features: set of features.:return: normalized set of features."""# Copy original array to prevent it from changes.features_normalized = np.copy(features).astype(float)# Get average values for each feature (column) in X.features_mean = np.mean(features, 0)# Calculate the standard deviation for each feature.features_deviation = np.std(features, 0)# Subtract mean values from each feature (column) of every example (row)# to make all features be spread around zero.if features.shape[0] > 1:features_normalized -= features_mean# Normalize each feature values so that all features are close to [-1:1] boundaries.# Also prevent division by zero error.features_deviation[features_deviation == 0] = 1features_normalized /= features_deviationreturn features_normalized, features_mean, features_deviation
"""Prepares the dataset for training"""import numpy as np
from .normalize import normalize
from .generate_sinusoids import generate_sinusoids
from .generate_polynomials import generate_polynomialsdef prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize_data=True):"""Prepares data set for training on prediction"""# Calculate the number of examples.num_examples = data.shape[0]# Prevent original data from being modified.data_processed = np.copy(data)# Normalize data set.features_mean = 0features_deviation = 0data_normalized = data_processedif normalize_data:(data_normalized,features_mean,features_deviation) = normalize(data_processed)# Replace processed data with normalized processed data.# We need to have normalized data below while we will adding polynomials and sinusoids.data_processed = data_normalized# Add sinusoidal features to the dataset.if sinusoid_degree > 0:sinusoids = generate_sinusoids(data_normalized, sinusoid_degree)data_processed = np.concatenate((data_processed, sinusoids), axis=1)# Add polynomial features to data set.if polynomial_degree > 0:polynomials = generate_polynomials(data_normalized, polynomial_degree, normalize_data)data_processed = np.concatenate((data_processed, polynomials), axis=1)# Add a column of ones to X.data_processed = np.hstack((np.ones((num_examples, 1)), data_processed))return data_processed, features_mean, features_deviation
def __init__(self, data, labels, layers, normalize_data=False):data_processed = prepare_for_training(data, normalize_data = normalize_data)[0]self.data = data_processedself.labels = labelsself.layers = layers # 28*28*1=784 25(隐层可以改) 10(最后输出结果)self.normalize_data = normalize_dataself.thetas = MultilayerPerceptron.thetas_init(layers)
@staticmethoddef sigmoid(z):"""Sigmoid 函数"""return 1.0 / (1.0 + np.exp(-np.asarray(z)))@staticmethoddef sigmoid_gradient(z):"""计算Sigmoid 函数的梯度"""g = np.zeros_like(z)# ====================== 你的代码 ======================# 计算Sigmoid 函数的梯度g的值dz = MultilayerPerceptron.sigmoid(z)g = dz * (1 - dz)# =======================================================return g
'''将矩阵拉长变成1*n'''@staticmethoddef thetas_unroll(thetas):num_thetas = len(thetas)unrolled_theta = np.array([])for num_thetas_index in range(num_thetas):unrolled_theta = np.hstack((unrolled_theta, thetas[num_thetas_index].flatten()))return unrolled_theta'''将1*n变成矩阵'''@staticmethoddef thetas_roll(unrolled_thetas, layers):num_layers = len(layers)thetas = {}unrolled_shift = 0for index in range(num_layers - 1):in_count = int(layers[index])out_count = int(layers[index+1])theta_width = in_count + 1theta_height = out_counttheta_volume = theta_width * theta_heightstart_index = unrolled_shiftend_index = unrolled_shift + theta_volumelayer_theta_unrolled = unrolled_thetas[start_index: end_index]thetas[index] = layer_theta_unrolled.reshape((theta_height, theta_width))unrolled_shift += theta_volumereturn thetas
'''初始化theta'''@staticmethoddef thetas_init(layers):num_layers = len(layers)thetas = {}for layer_index in range(num_layers - 1):'''执行两次,得到两组参数矩阵:25*785 10*26'''in_count = int(layers[layer_index])out_count = int(layers[layer_index + 1])# print(type(in_count))# 这里考虑偏置项,偏置的个数和输出的结果是一致的randomTheta = np.random.rand(out_count, in_count + 1) * 0.05 # 随机初始化 值尽量小点# print(randomTheta)thetas[layer_index] = randomThetaprint(thetas[layer_index].shape)return thetas
'''计算损失函数'''@staticmethoddef cost_function(data, labels, thetas, layers):num_layers = len(layers)num_examples = data.shape[0]num_labels = layers[-1]# 正向传播predictions = MultilayerPerceptron.feedforward_propagation(data, thetas, layers)# 制作标签,每个样本对应的都是one-hotbitwise_labels = np.zeros((num_examples, num_labels))for example_index in range(num_examples):bitwise_labels[example_index][labels[example_index][0]] = 1# 这里有很大很大的疑问bit_set_cost = np.sum(np.log(predictions[bitwise_labels == 1])) # 预测正确的bit_not_set_cost = np.sum(np.log(1 - predictions[bitwise_labels == 1])) # 我感觉自己是正确的cost = (-1 / num_examples) * (bit_set_cost + bit_not_set_cost)return cost'''正向传播'''@staticmethoddef feedforward_propagation(data, thetas, layers):num_layers = len(layers)num_examples = data.shape[0]in_layer_activation = datafor index in range(num_layers - 1):theta = thetas[index]print(theta.shape)out_layer_activation = MultilayerPerceptron.sigmoid(np.dot(in_layer_activation, theta.T)) # 1700*785 785*25# 正常计算完是num_examples * 25 需要多加一列 变成num_examples * 26out_layer_activation = np.hstack((np.ones((num_examples, 1)), out_layer_activation))in_layer_activation = out_layer_activation# 去除偏置项return in_layer_activation[:, 1:]
'''反向传播'''@staticmethoddef back_propagation(data, labels, thetas, layers):num_layers = len(layers)num_examples = data.shape[0]num_features = data.shape[1]num_label_types = layers[-1]deltas = {}# 初始化操作for index in range(num_layers - 1):in_count = layers[index]out_count = layers[index + 1]# 这一步很难理解,但是实际上生成的是三层神经网络中间产生两次的中间矩阵# 第一个是 25 * 785 第二个是 10 * 26deltas[index] = np.zeros((out_count, in_count + 1))for example_index in range(num_examples):layer_inputs = {}layer_activations = {}layer_activation = data[example_index, :].reshape((num_features, 1)) # 785*1 初始元素layer_activations[0] = layer_activation# 逐层计算for index in range(num_layers - 1):layer_theta = thetas[index] # 25*785 10*26# 与前向传播不同的是 这里与theta相乘的不是完整数据集 而是每个样本单独转置后的结果 785*1layer_input = MultilayerPerceptron.sigmoid(np.dot(layer_theta, layer_activation))layer_activation = np.vstack((np.array([[1]]), layer_input))layer_inputs[index + 1] = layer_input # 后一层计算结果layer_activations[index + 1] = layer_activation # 后一层经过多加了一列的结果# !!!!!!!!!!!!output_layer_activation = layer_activation[1:, :]delta = {}# 标签处理bitwise_label = np.zeros((num_label_types, 1))bitwise_label[labels[example_index][0]] = 1# 计算输出层和真实值之间的差异delta[num_layers - 1] = output_layer_activation - bitwise_label # 10*1# 循环遍历 L L-1 L-2...2 这里直接套视频里的公式即可for index in range(num_layers - 2, 0, -1):layer_theta = thetas[index]next_delta = delta[index + 1]layer_input = layer_inputs[index]layer_input = np.vstack((np.array([[1]]), layer_input))# 按照公式推delta[index] = np.dot(layer_theta.T, next_delta) * MultilayerPerceptron.sigmoid_gradient(layer_input)# 过滤掉偏置参数delta[index] = delta[index][1:, :]for index in range(num_layers - 1):layer_delta = np.dot(delta[index+1], layer_activations[index].T)# 第一次是 25*785 第二次是10*26deltas[index] = deltas[index] + layer_deltafor index in range(num_layers - 1):deltas[index] /= num_examplesreturn deltas
'''梯度..'''@staticmethoddef gradient_step(data, labels, optimized_theta, layers):theta = MultilayerPerceptron.thetas_roll(optimized_theta, layers)thetas_rolled_gradients = MultilayerPerceptron.back_propagation(data, labels, theta, layers)thetas_unrolled_gradients = MultilayerPerceptron.thetas_unroll(thetas_rolled_gradients)return thetas_unrolled_gradients'''梯度下降算法'''@staticmethoddef gradient_descent(data, labels, unrolled_theta, layers, max_iter, alpha):optimized_theta = unrolled_theta # 最终theta结果cost_history = []for index in range(max_iter):# 这里记得要及时更新thetacost = MultilayerPerceptron.cost_function(data, labels, MultilayerPerceptron.thetas_roll(optimized_theta, layers), layers)# cost = MultilayerPerceptron.cost_function(data, labels, MultilayerPerceptron.thetas_roll(unrolled_theta, layers), layers)cost_history.append(cost)# 得到最终梯度结果 进行参数更新操作theta_gradient = MultilayerPerceptron.gradient_step(data, labels, optimized_theta, layers)# 更新操作optimized_theta -= alpha * theta_gradientreturn optimized_theta, cost_history
'''训练模块'''def train(self, max_iter = 1000, alpha = 0.1):unrolled_theta = MultilayerPerceptron.thetas_unroll(self.thetas)optimized_theta, cost_history = MultilayerPerceptron.gradient_descent(self.data, self.labels, unrolled_theta, self.layers, max_iter, alpha)self.thetas = MultilayerPerceptron.thetas_roll(optimized_theta, self.layers)return self.thetas, cost_history
MNIST是知名数字数据集,大家可以百度搜索资源,这里使用的是csv文件进行识别。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mping
import mathfrom ANN.MultilayerPerceptron import MultilayerPerceptrondata = pd.read_csv('data/mnist_csv/mnist_train.csv')
data2 = pd.read_csv('data/mnist_csv/mnist_test.csv')
# numbers_to_display = 25 # 一次展示25个图
# num_cell = math.ceil(math.sqrt(numbers_to_display))
# plt.figure(figsize=(10, 10))
# for index in range(numbers_to_display):
# digit = data[index: index+1].values
# # print(digit.shape)
# digit_label = digit[0][0]
# digit_pixels = digit[0][1:]
# img_size = int(math.sqrt(digit_pixels.shape[0]))
# frame = digit_pixels.reshape((img_size, img_size)) # 点点转为矩阵
# plt.subplot(num_cell, num_cell, index + 1)
# plt.imshow(frame, cmap='Greys')
# plt.title(digit_label)
# plt.subplots_adjust(wspace=0.5, hspace=0.5) # 调整每个子图外边距
# plt.show()train_data = data.sample(frac=0.1)
test_data = data2.sample(frac=0.1)
train_data = train_data.values
test_data = test_data.valuesx_train = train_data[:, 1:]
y_train = train_data[:, [0]]
x_test = test_data[:, 1:]
y_test = test_data[:, [0]]layers = [784, 25, 10]normalize_data = True
max_iter = 300
alpha = 0.1multilayer_perceptron = MultilayerPerceptron(x_train, y_train, layers, normalize_data)
thetas, costs = multilayer_perceptron.train(max_iter, alpha)
plt.plot(range(len(costs)), costs)
plt.xlabel('梯度下降step')
plt.ylabel('cost')
plt.show()y_train_predictions = multilayer_perceptron.predict(x_train)
y_test_predictions = multilayer_perceptron.predict(x_test)train_p = np.sum(y_train_predictions == y_train)/y_train.shape[0] * 100
test_p = np.sum(y_test_predictions == y_test)/y_test.shape[0] * 100
print("训练准确率:", train_p)
print("测试准确率:", test_p)训练准确率: 73.8
测试准确率: 74.2
数据集在课本上给出的网站上,但是我们先对数据进行处理,将图片转化为合适的像素矩阵,标签也要转化为适合处理的矩阵。
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
from ANN.MultilayerPerceptron import MultilayerPerceptrondef imgval(example):#定义将图片转化为矩阵的方法values=[]for i in range(0,example.width):#循环图片的每一行for j in range(0,example.height):#循环图片的每一列values.append(example.getpixel((i,j))/100)#对图片的rgb值进行缩小处理# values=np.array(values)#返回成numpy数组形式return values'''
定义读取图片的方法
'''
def readimg(path):returndict = {}# os.walk是通过深度优先遍历 home是每次遍历的文件夹 files是读取每个子文件夹的文件for home, dirs, files in os.walk(path): # 读取该文件夹下所有的子文件夹for filename in files: # 读取各个子文件夹下的图片val=[]im=Image.open(os.path.join(home, filename)) # 定义该图片路径val.append(im)namelist=filename.split("_")if namelist[1]=="left":#给图片打上目标值标签val.append([0])elif namelist[1]=="right":val.append([1])elif namelist[1]=="up":val.append([2])elif namelist[1]=="straight":val.append([3])# 我们这里把图片和标签拼接returndict[filename]=valreturn returndict#返回图片字典'''
把所有图片转化为矩阵 标签转化为列表
'''
def picTwoXY(Imgs):x_train = []y_train = []for img in Imgs:x_train.append(imgval(img[0]))y_train.append(img[-1])return x_train, y_traintrainimgsrc='data/faces' # 定义训练集文件夹
testimgsrc='data/test' # 定义测试集文件夹
trainImgs = readimg(trainimgsrc)
testImgs = readimg(testimgsrc)
x_train, y_train = picTwoXY(trainImgs.values())
x_test, y_test = picTwoXY(testImgs.values())
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)
print(type(x_train))
print(type(y_train))
print(x_train.shape)layers = [960, 25, 4]normalize_data = True
max_iter = 300
alpha = 0.1multilayer_perceptron = MultilayerPerceptron(x_train, y_train, layers, normalize_data)
thetas, costs = multilayer_perceptron.train(max_iter, alpha)plt.plot(range(len(costs)), costs)
plt.xlabel('梯度下降step')
plt.ylabel('cost')
plt.show()y_train_predictions = multilayer_perceptron.predict(x_train)
y_test_predictions = multilayer_perceptron.predict(x_test)train_p = np.sum(y_train_predictions == y_train)/y_train.shape[0] * 100
test_p = np.sum(y_test_predictions == y_test)/y_test.shape[0] * 100
print("训练准确率:", train_p)
print("测试准确率:", test_p)
学习了ANN,手动实现正反向传播,但是准确率很差,浮动在70-80之间。手动实现的感觉就这水平了,没有pytorch框架运行的准确率高。
希望继续加油2022快点过去吧
。