1 线性回归模型

1.1 单变量线性回归模型

简单实现

数据读取

数据集为ex1data.txt，数据形式如下：

6.1101,17.592
5.5277,9.1302
8.5186,13.662
7.0032,11.854
5.8598,6.8233
8.3829,11.886
7.4764,4.3483
    ...
8.2934,0.14454
13.394,9.0551
5.4369,0.61705

使用read_csv()读取文本中的数据：

1
2
3

# 读取数据
path = 'ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit']

数据提取

读取数据总行数为：97，data目前可以看做一个(97,2)的矩阵，第一列为Population值，第二列为Profit值。

使用insert()函数为population’列前添加值均为1的一列，方便矩阵运算，将前两列作为训练值X，后一列作为目标Y

data.insert(0, 'Ones', 1)

# set X (training data) and y (target variable)
# data.shape 得到一个一维数组数组，data.shape[0]、data.shape[1]分别存储列数、行数
cols = data.shape[1]

# data.iloc[row_index,col_index[，选择所有数据，数据范围是[row_index,col_index]
X = data.iloc[:, 0:cols - 1]  # X是所有行，去掉最后一列
y = data.iloc[:, cols - 1:cols]  # y是所有行，最后一列

得到向量：

X = np.matrix(X.values)
y = np.matrix(y.values)
# theta 存储参数，一个(1,2)的矩阵，初始值设定为0，即：w = 0, b = 0
theta = np.matrix(np.array([0, 0]))

代价函数

# 代价函数
# X 输入数据，是一个(97,2)的矩阵，第一列为ones均为1，第二列为population为输入值
# y 输出数据，是一个(97,1)的矩阵
# theta 存储参数，一个(1,2)的矩阵，初始值设定为0，即：w = 0, b = 0
def computeCost(X, y, theta):
    # np.power，对于矩阵每一个项执行次方运算
    # X * theta.T ：theta.T为参数矩阵的转至，得到 y = wx + b，得到的是f(x)的(97,1)矩阵
    inner = np.power(((X * theta.T) - y), 2)
    # np.sun 矩阵求和
    return np.sum(inner) / (2 * len(X))

梯度下降算法

# 梯度下降算法
# alpha 学习率
# iters 学习次数
def gradientDescent(X, y, theta, alpha, iters):
    # 创建一个全0的（1,2）矩阵，临时存储theta改变后的值
    temp = np.matrix(np.zeros(theta.shape))
    # 参数个数，即2，theta.ravel()向量展平为一维，会改变原数据，而flatten不会改变原数据
    parameters = int(theta.ravel().shape[1])
    # 记录每一次学习的代价
    cost = np.zeros(iters)

    # 遍历每一次学习次数
    for i in range(iters):
        # X * theta.T ：theta.T为参数矩阵的转至，得到 y = wx + b，得到的是f(x)的(97,1)矩阵
        error = (X * theta.T) - y

        # 修改每一个参数j = 0时，修改 w，w = a/m * np.sum(np.multiply(error, X[:, j])
        # 修改 b = a/m * np.sum(error);
        for j in range(parameters):
            # X[:, j] 是一个(97, 1)矩阵，如果j = 0, 则X[:, j] 矩阵全为 1 ，此时term = error，计算w
            # 如果j = 1，X[:, j]为Population那一列，此时term = error * x，计算b
            term = np.multiply(error, X[:, j])
            temp[0, j] = theta[0, j] - ((alpha / len(X)) * np.sum(term))

        # 修改完之后theta赋值
        theta = temp
        # 计算代价并保存（用于画图）
        cost[i] = computeCost(X, y, theta)

    return theta, cost

函数调用

alpha = 0.01
iters = 10000

g, cost = gradientDescent(X, y, theta, alpha, iters)

绘图

# 创建等差数组，Population最小值到最大值，数组长度为100
x = np.linspace(data.Population.min(), data.Population.max(), 100)
f = g[0, 0] + (g[0, 1] * x)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 8))

# 画线
ax1.plot(x, f, 'r', label='Prediction')
# 画点
ax1.scatter(data.Population, data.Profit, label='Traning Data')
ax1.legend(loc=2)
ax1.set_xlabel('Population')
ax1.set_ylabel('Profit')
ax1.set_title('Predicted Profit vs. Population Size')

ax2.plot(np.arange(iters), cost, linewidth=2, color='red')
ax2.set_xlabel('Iterations')
ax2.set_ylabel('Cost')
ax2.set_title('Error vs. Training Epoch')
plt.show()

最终结果

完整代码

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


# 代价函数
# X 输入数据，是一个(97,2)的矩阵，第一列为ones均为1，第二列为population为输入值
# y 输出数据，是一个(97,1)的矩阵
# theta 存储参数，一个(1,2)的矩阵，初始值设定为0，即：w = 0, b = 0
def computeCost(X, y, theta):
    # np.power 求次方
    # X * theta.T ：theta.T为参数矩阵的转至，得到 y = wx + b，得到的是f(x)的(97,1)矩阵
    inner = np.power(((X * theta.T) - y), 2)
    # np.sun 矩阵求和
    return np.sum(inner) / (2 * len(X))


# 梯度下降算法
# alpha 学习率
# iters 学习次数
def gradientDescent(X, y, theta, alpha, iters):
    # 创建一个全0的（1,2）矩阵，临时存储theta改变后的值
    temp = np.matrix(np.zeros(theta.shape))
    # 参数个数，即2，theta.ravel()向量展平为一维，会改变原数据，而flatten不会改变原数据
    parameters = int(theta.ravel().shape[1])
    # 记录每一次学习的代价
    cost = np.zeros(iters)

    # 遍历每一次学习次数
    for i in range(iters):
        # X * theta.T ：theta.T为参数矩阵的转至，得到 y = wx + b，得到的是f(x)的(97,1)矩阵
        error = (X * theta.T) - y

        # 修改每一个参数j = 0时，修改 w，w = a/m * np.sum(np.multiply(error, X[:, j])
        # 修改 b = a/m * np.sum(error);
        for j in range(parameters):
            # X[:, j] 是一个(97, 1)矩阵，如果j = 0, 则X[:, j] 矩阵全为 1 ，此时term = error，计算w
            # 如果j = 1，X[:, j]为Population那一列，此时term = error * x，计算b
            term = np.multiply(error, X[:, j])
            temp[0, j] = theta[0, j] - ((alpha / len(X)) * np.sum(term))

        # 修改完之后theta赋值
        theta = temp
        cost[i] = computeCost(X, y, theta)

    return theta, cost


# 读取数据
path = 'ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])

# data.plot(kind='scatter', x='Population', y='Profit', figsize=(12,8))
# plt.show()

data.insert(0, 'Ones', 1)

# set X (training data) and y (target variable)
# data.shape 得到一个一维数组数组，data.shape[0]、data.shape[1]分别存储列数、行数
cols = data.shape[1]

# data.iloc[row_index,col_index[，选择所有数据，数据范围是[row_index,col_index]
X = data.iloc[:, 0:cols - 1]  # X是所有行，去掉最后一列
y = data.iloc[:, cols - 1:cols]  # y是所有行，最后一列

# print(X.head())
# print("---------------")
# print(y.head())

X = np.matrix(X.values)
y = np.matrix(y.values)
# theta 存储参数，一个(1,2)的矩阵，初始值设定为0，即：w = 0, b = 0
theta = np.matrix(np.array([0, 0]))

alpha = 0.01
iters = 1000

g, cost = gradientDescent(X, y, theta, alpha, iters)

# 创建等差数组，Population最小值到最大值，数组长度为100
x = np.linspace(data.Population.min(), data.Population.max(), 100)
f = g[0, 0] + (g[0, 1] * x)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 8))

# 画线
ax1.plot(x, f, 'r', label='Prediction')
# 画点
ax1.scatter(data.Population, data.Profit, label='Traning Data')
ax1.legend(loc=2)
ax1.set_xlabel('Population')
ax1.set_ylabel('Profit')
ax1.set_title('Predicted Profit vs. Population Size')

ax2.plot(np.arange(iters), cost, linewidth=2, color='red')
ax2.set_xlabel('Iterations')
ax2.set_ylabel('Cost')
ax2.set_title('Error vs. Training Epoch')
plt.show()

TensorFlow框架实现

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

# 读取数据
path = 'ex1data1.txt'
data = pd.read_csv(path, header=None, names=['Population', 'Profit'])

# 提取特征和目标值
X = data['Population'].values
y = data['Profit'].values

# 数据集划分
# 使用train_test_split函数将数据集划分为训练集和测试集：X是特征数据，y是目标值数据，
# test_size=0.2表示将数据划分为80%的训练集和20%的测试集，random_state=42用于设置随机种子，保证每次划分结果的一致性。
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建模型
# 在这个例子中，我们使用一个具有1个神经元的全连接层，输入形状为(1,)，激活函数为线性激活函数。
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1, input_shape=(1,), activation='linear')
])

# 编译模型
# optimizer = ‘sgd’ 选择随机梯度下降（Stochastic Gradient Descent，SGD）作为优化器
model.compile(optimizer='sgd', loss='mean_squared_error')

# 训练模型
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), verbose=1)

# 获取模型参数
w, b = model.get_weights()

# 创建一个包含两个子图的画布
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

# 绘制训练集损失曲线
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.plot(history.history['loss'], label='Training Loss')
ax1.set_title('Training Loss')
# 绘制测试集损失曲线
ax1.plot(history.history['val_loss'], label='Test Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.set_title('Loss')
ax1.legend()
# 绘制分布图和模型函数
ax2.scatter(X, y, label='Training Data')
ax2.plot(X, w[0] * X + b[0], 'r', label='Model')
ax2.set_xlabel('Population')
ax2.set_ylabel('Profit')
ax2.set_title('Population vs. Profit')
ax2.legend()

# 调整子图间的间距
plt.tight_layout()

# 显示图形
plt.show()

1.2 多变量线性回归模型

简单实现

与单变量线性回归模型类似，这里不多解释

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# 代价函数
# X 输入数据，是一个(97,3)的矩阵，第一列为ones均为1，第二列为population为输入值
# y 输出数据，是一个(97,1)的矩阵
# theta 存储参数，一个(1,2)的矩阵，初始值设定为0，即：w = 0, b = 0
def computeCost(X, y, theta):
    # np.power 求次方
    # X * theta.T ：theta.T为参数矩阵的转至，得到 y = wx + b，得到的是f(x)的(97,1)矩阵
    inner = np.power(((X * theta.T) - y), 2)
    # np.sun 矩阵求和
    return np.sum(inner) / (2 * len(X))


# 梯度下降算法
# alpha 学习率
# iters 学习次数
def gradientDescent(X, y, theta, alpha, iters):
    # 创建一个全0的（1,3）矩阵，临时存储theta改变后的值
    temp = np.matrix(np.zeros(theta.shape))
    # 参数个数，即3，theta.ravel()向量展平为一维，会改变原数据，而flatten不会改变原数据
    parameters = int(theta.ravel().shape[1])
    # 记录每一次学习的代价
    cost = np.zeros(iters)

    # 遍历每一次学习次数
    for i in range(iters):
        # X * theta.T ：theta.T为参数矩阵的转至，得到 y = wx + b，得到的是f(x)的(97,1)矩阵
        error = (X * theta.T) - y

        # 修改每一个参数j = 0时，修改 w，w = a/m * np.sum(np.multiply(error, X[:, j])
        # 修改 b = a/m * np.sum(error);
        for j in range(parameters):
            # X[:, j] 是一个(97, 1)矩阵，如果j = 0, 则X[:, j] 矩阵全为 1 ，此时term = error，计算w
            # 如果j = 1，X[:, j]为Population那一列，此时term = error * x，计算b
            term = np.multiply(error, X[:, j])
            temp[0, j] = theta[0, j] - ((alpha / len(X)) * np.sum(term))

        # 修改完之后theta赋值
        theta = temp
        cost[i] = computeCost(X, y, theta)

    return theta, cost


path = "ex1data2.txt"
data = pd.read_csv(path, header=None, names=['Size', 'Bedrooms', 'Price'])
# print(data.head())

# 特征缩放
data = (data - data.mean()) / data.std()

# add ones column
data.insert(0, 'Ones', 1)

# set X (training data) and y (target variable)
cols = data.shape[1]
X = data.iloc[:, 0:cols - 1]
y = data.iloc[:, cols - 1:cols]
theta = np.matrix(np.array([0, 0, 0]))
X = np.matrix(X.values)
y = np.matrix(y.values)

alpha = 0.01
iters = 1000
g, cost = gradientDescent(X, y, theta, alpha, iters)

# 创建图表和子图
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')

# 绘制数据点
ax.scatter(data['Size'], data['Bedrooms'], data['Price'], c='r', marker='o')

# 绘制模型平面
size_vals = np.linspace(data['Size'].min(), data['Size'].max(), 100)
bedroom_vals = np.linspace(data['Bedrooms'].min(), data['Bedrooms'].max(), 100)
size_vals, bedroom_vals = np.meshgrid(size_vals, bedroom_vals)
price_vals = g[0, 2] + g[0, 0] * size_vals + g[0, 1] * bedroom_vals
ax.plot_surface(size_vals, bedroom_vals, price_vals, color='blue', alpha=0.5)

# 设置坐标轴标签
ax.set_xlabel('Size')
ax.set_ylabel('Bedrooms')
ax.set_zlabel('Price')

# 设置图表标题
ax.set_title('Price vs. Size and Bedrooms')

# 显示图表
plt.show()

TensorFlow实现

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

# 读取数据
path = "ex1data2.txt"
data = pd.read_csv(path, header=None, names=['Size', 'Bedrooms', 'Price'])
data = (data - data.mean()) / data.std()

# 特征和标签
X = data[['Size', 'Bedrooms']].values
y = data['Price'].values

# 创建模型
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1, input_shape=(2,))
])

# 编译模型
# loss = 'mse' 表示使用均方差代价函数
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01), loss='mse')

# 训练模型
history = model.fit(X, y, epochs=1000, verbose=0)

# 获取模型的权重参数
w, b = model.get_weights()

# 创建图表和子图
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')

# 绘制数据点
ax.scatter(data['Size'], data['Bedrooms'], data['Price'], c='r', marker='o')

# 绘制模型平面
size_vals = np.linspace(data['Size'].min(), data['Size'].max(), 100)
bedroom_vals = np.linspace(data['Bedrooms'].min(), data['Bedrooms'].max(), 100)
size_vals, bedroom_vals = np.meshgrid(size_vals, bedroom_vals)
price_vals = w[0] * size_vals + w[1] * bedroom_vals + b
ax.plot_surface(size_vals, bedroom_vals, price_vals, color='blue', alpha=0.5)

# 设置坐标轴标签
ax.set_xlabel('Size')
ax.set_ylabel('Bedrooms')
ax.set_zlabel('Price')

# 设置图表标题
ax.set_title('Price vs. Size and Bedrooms')

# 显示图表
plt.show()

2 逻辑回归模型

6.9 实训：Softmax函数处理手写数字图像识别

对于此练习，我们将使用逻辑回归来识别手写数字（0到9）。我们将扩展我们在练习2中写的逻辑回归的实现，并将其应用于一对一的分类。让我们开始加载数据集。它是在MATLAB的本机格式，所以要加载它在Python，我们需要使用一个SciPy工具。

第一步：参数读取与处理

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
import cv2 as cv2

# 导入数据集
data = loadmat('ex3data1.mat')

X = data['X']
y = data['y'].flatten()
# 将 y 的标签映射为 0-9 的下标
y = y % 10
# 数字类别数量
num_labels = 10

# 添加偏置项
X = np.insert(X, 0, values=1, axis=1)

# 初始化参数
n = X.shape[1]
initial_theta = np.zeros((num_labels, n))

# alpha
alpha = 0.1
# 学习率
learning_rate = 1

# 迭代次数
num_iters = 10000

第二步：定义Softmax函数

1
2
3

def softmax(z):
    e_z = np.exp(z)
    return e_z / np.sum(e_z, axis=1, keepdims=True)

第三步：定义代价函数

def costFunction(theta, X, y, learningRate):
    m = len(y)
    h = softmax(X @ theta.T)
    regularization = (learningRate / (2 * m)) * np.sum(np.power(th	eta[:, 1:], 2))
    cost = (-1 / m) * np.sum(np.log(h[np.arange(m), y])) + regularization
    gradient = (1 / m) * ((X.T) @ (h - np.eye(len(h[1]))[y]))
    gradient = gradient.T
    gradient[:, 1:] = gradient[:, 1:] + (learningRate / m) * theta[:, 1:]
    return cost, gradient

下面假定样本总数为5000，样本分类数为10：

h = softmax(X @ theta.T) ：获得概率矩阵

cost = (-1 / m) * np.sum(np.log(h[np.arange(m), y])) + regularization : 得到代价函数，其中h[np.arrange(m),y]意思是对于 h 中所有的行，只选中 y 中值对应的那一列，得到一个(5000,1)矩阵每行都是对于样本的成本函数。

gradient = (1 / m) * ((X.T) @ (h - np.eye(len(h[1]))[y])) ：得到梯度

X 矩阵：

h - np.eye(len(h[1]))[y]) 是生成一个len(h[1])*len(h[1]) 的单位矩阵，np.eye(len(h[1]))[y]) 从第一行开始按照 y 矩阵中的值作为行索引填充这个单位矩阵的值，h-np.eye(len(h[1]))[y])实际上是修改$a_i$的值使得$a_i$部分的值能够减一。

第四步：梯度下降

def gradientDescent(X, y, theta,alpha,  learning_rate, num_iters):
    m = len(y)
    J_history = []
    for i in range(num_iters):
        cost, gradient = costFunction(theta, X, y, learning_rate)
        theta = theta - alpha * gradient
        J_history.append(cost)
    return theta, J_history

第五步：训练

# 使用梯度下降算法进行多类别分类
theta, J_history = gradientDescent(X, y, initial_theta, alpha,  learning_rate, num_iters)

# 预测
probabilities = softmax(X @ theta.T)
predictions = np.argmax(probabilities, axis=1) + 1

# 计算准确率
accuracy = np.mean(predictions == y) * 100
print(f"准确率：{accuracy}%")