ex3-neural network

AndrewNg 机器学习习题ex3-neural network

ex3data1.mat是一个matlab文件，储存了5000个图像的数据，每个图像是一个20像素×20像素的灰度图，展开后为一个400维的向量，每一个向量都储存为矩阵X的行，所以X的维度是（5000，400）
y的每一行代表X所对应的手写数字，y的维度是（5000，1）

需要的头：

import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
from sklearn.metrics import classification_report # 这个包是评价报告

Visualizing the data

载入数据：

def load_data(path, transpose=True):
    data = sio.loadmat(path)
    y = data.get('y')
    y = y.reshape(y.shape[0])
    X = data.get('X')
    if transpose:
        X = np.array([im.reshape((20, 20)).T for im in X])
        X = np.array([im.reshape(400) for im in X])
    return X, y

X, y = load_data('./data/ex3data1.mat')

画一个图

def plot_an_image(image):
    fig, ax = plt.subplots(figsize=(1, 1))
    ax.matshow(image.reshape((20, 20)), cmap=matplotlib.cm.binary)
    plt.xticks(np.array([]))
    plt.yticks(np.array([]))
    plt.show()

pick_one = np.random.randint(0, 5000)
plot_an_image(X[pick_one, :])
print('this should be {}'.format(y[pick_one]))

画一百个图

def plot_100_image(X):
    size = int(np.sqrt(X.shape[1]))
    sample_idx = np.random.choice(np.array(X.shape[0]), 100)
    sample_images = X[sample_idx, :]

    fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))

    for r in range(10):
        for c in range(10):
            ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)), cmap=matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))
    plt.show()
    
plot_100_image(X)

准备数据

加载好ex3data1.mat文件后我们需要处理一下，首先X是一个(5000,400)的矩阵，我们在第一列加上一列全为1的矩阵为偏差量，y是一个(5000,)的矩阵，需要注意的是，为了兼容Oxtave和matlab，y中0的被标记为了10。我们把y分成10类整理y数据为(10,5000)的一个矩阵。

扩展 5000*1 到 5000*10
     比如 y=10 -> [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]: ndarray
     比如 y=1 -> [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]: ndarray

raw_X, raw_y = load_data('./data/ex3data1.mat')
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis = 1) # 插入了第一列 全为1
y_matrix = []
for k in range(1, 11):
    y_matrix.append((raw_y == k).astype(int))
y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)

训练一维模型

处理好数据后接着写，激活函数和代价函数，代价函数的偏导数就是梯度函数，我们期望这个函数最小。给梯度函数和代价函数加入正则项。

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y):
    return np.mean(-y * np.log(sigmoid(X @ theta)) - (1 - y) * np.log(1 - sigmoid(X @ theta)))

# 梯度就是jθ的在θ偏导
def gradient(theta, X, y):
    # @ 对应元素相乘求和
    return (1 / len(X)) * X.T @ (sigmoid(X @ theta) - y)

def regularized_cost(theta, X, y, l=1):
    theta_j1_to_n = theta[1:]
    regularized_term = (1 / (2 * len(X))) * np.power(theta_j1_to_n, 2).sum()

    return cost(theta, X, y) + regularized_term

def regularized_gradient(theta, X, y, l=1):
    theta_j1_to_n = theta[1:]
    regularized_theta = (l / len(X)) * theta_j1_to_n
    regularized_term = np.concatenate([np.array([0]), regularized_theta]) # 在theta矩阵前接一个[0]
    return gradient(theta, X, y) + regularized_term

运用minimize()函数开始迭代，计算出theta，然后验证theta的准确性。

def logistic_regression(X, y, l=1):
    theta = np.zeros(X.shape[1])
    res = opt.minimize(fun=regularized_cost, x0=theta, args=(X, y, l), method='TNC', jac=regularized_gradient, options={'disp': True})
    final_theta = res.x
    return final_theta

def predict(x, theta):
    prob = sigmoid(x @ theta)
    return (prob >= 0.5).astype(int)

t0 = logistic_regression(X, y[0])
y_pred = predict(X, t0)
print('Accuracy={}'.format(np.mean(y[0] == y_pred)))

最终求得结果为 Accuracy=0.9974

训练K维模型

k_theta = np.array([logistic_regression(X, y[k]) for k in range(10)])
print(k_theta.shape) # (10, 401)

prob_matrix = sigmoid(X @ k_theta.T)
np.set_printoptions(suppress=True) # 科学计数法表示
print(prob_matrix.shape) # (5000, 10)

y_pred = np.argmax(prob_matrix, axis=1)
print(y_pred.shape) # (5000,)

y_answer = raw_y.copy()
y_answer[y_answer==10] = 0

print(classification_report(y_answer, y_pred))

             precision    recall  f1-score   support

          0       0.97      0.99      0.98       500
          1       0.95      0.99      0.97       500
          2       0.95      0.92      0.93       500
          3       0.95      0.91      0.93       500
          4       0.95      0.95      0.95       500
          5       0.92      0.92      0.92       500
          6       0.97      0.98      0.97       500
          7       0.95      0.95      0.95       500
          8       0.93      0.92      0.92       500
          9       0.92      0.92      0.92       500

avg / total       0.94      0.94      0.94      5000

如ex3.pdf中所说，我们成功的分类出94%的例子。

Feedforward Propagation and Prediction

我们的神经网路如上图所示，它有3层构成（一个输入层，一个隐藏层a，一个输出层。）已经提供了一组训练参数（Θ1，Θ2）储存在ex3weights.mat中

% Load saved matrices from file
load('ex3weights.mat');
% The matrices Theta1 and Theta2 will now be in your Octave
% environment
% Theta1 has size 25 x 401
% Theta2 has size 10 x 26

def load_weight(path):
    data = sio.loadmat(path)
    return data['Theta1'], data['Theta2']

theta1, theta2 = load_weight('./data/ex3weights.mat')

X, y = load_data('./data/ex3data1.mat',transpose=False)

X = np.insert(X, 0, values=np.ones(X.shape[0]), axis=1)  # intercept

a1 = X

z2 = a1 @ theta1.T # (5000, 401) @ (25,401).T = (5000, 25)
print(z2.shape)

z2 = np.insert(z2, 0, values=np.ones(z2.shape[0]), axis=1)

a2 = sigmoid(z2)

z3 = a2 @ theta2.T

a3 = sigmoid(z3)

y_pred = np.argmax(a3, axis=1) + 1  # numpy is 0 base index, +1 for matlab convention，返回沿轴axis最大值的索引，axis=1代表行

print(classification_report(y, y_pred))

             precision    recall  f1-score   support

          1       0.97      0.98      0.97       500
          2       0.98      0.97      0.97       500
          3       0.98      0.96      0.97       500
          4       0.97      0.97      0.97       500
          5       0.98      0.98      0.98       500
          6       0.97      0.99      0.98       500
          7       0.98      0.97      0.97       500
          8       0.98      0.98      0.98       500
          9       0.97      0.96      0.96       500
         10       0.98      0.99      0.99       500

avg / total       0.98      0.98      0.98      5000