1. ridge回归

1.1 岭回归的手动实现

  • 描述
手动实现,就是,根据上一篇博客的线性回归,里面岭回归公式的推导,一步一步来实现。
- 均方误差:MSE
- 损失函数
- 求导:最小二乘法
  • 代码
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error

# data
data = np.array([[-2.95507616,  10.94533252],
                 [-0.44226119,   2.96705822],
                 [-2.13294087,   6.57336839],
                 [1.84990823,   5.44244467],
                 [0.35139795,   2.83533936],
                 [-1.77443098,   5.6800407],
                 [-1.8657203,   6.34470814],
                 [1.61526823,   4.77833358],
                 [-2.38043687,   8.51887713],
                 [-1.40513866,   4.18262786]])
n = data.shape[0]  # 样本大小
x_matrix = data[:, 0].reshape(-1, 1)  # 将array转换成矩阵
y_matrix = data[:, 1].reshape(-1, 1)

# 代价函数
def loss_omega(omega_matrix_param, x_matrix_param, y_matrix_param, lambda_param):
    """
    omega_matrix_param: (k+1, 1)
    x_matrix_param: (n, k+1)
    y_matrix_param: (n, 1)
    """
    f = np.dot(x_matrix_param, omega_matrix_param)  # np.dot 表示矩阵乘法, f = (n, 1)
    omega_without_w0 = omega_matrix_param[1:]
    L_omega = 0.5 * mean_squared_error(f, y_matrix_param) + 0.5 * lambda_param * np.sum(np.square(omega_without_w0))
    return L_omega

# 梯度下降
def GD(omega_matrix_param, x_matrix_param, y_matrix_param, lambda_param, alpha_param):
    for i in range(T):
        h = np.dot(x_matrix_param, omega_matrix_param)
        omega_w0 = np.r_[np.zeros([1, 1]), omega_matrix_param[1:]]  # set omega_w0 = 0
        omega_matrix_param -= (alpha_param * 1/n * np.dot(x_matrix_param.T, h - y_matrix_param) + lambda_param*(omega_w0))  # 求导
        if i%50000 == 0:
            print(loss_omega(omega_matrix_param=omega_matrix_param, x_matrix_param=\
                x_matrix_param, y_matrix_param=y_matrix_param,  lambda_param=lambda_param))
    return omega_matrix_param

# 初始训练参数
T = 1200000  # 迭代次数
degree = 11
omega = np.ones((degree + 1, 1))  # 一共12特征变量数
alpha = 0.0000000006  # 学习率
lamb = 0.0001

# 训练————岭回归
demo1 = PolynomialFeatures(degree=degree, include_bias=False)
x_matrix_nihe = demo1.fit_transform(x_matrix)
x_matrix_x0 = np.c_[np.ones((n, 1)), x_matrix_nihe] # 就是按列叠加两个矩阵,就是把两个矩阵左右组合, x_matrix加了一列,位置为第0位
omega = GD(lambda_param=lamb, x_matrix_param=x_matrix_x0, omega_matrix_param=omega, y_matrix_param=y_matrix, alpha_param=alpha)

# 画图
"""画岭回归线"""
x_plot = np.linspace(-2.99, 1.9, 1000).reshape(-1, 1) # 许多x值,范围
demo2 = PolynomialFeatures(degree=degree, include_bias=True) # 12个参数,布尔值只产生交互项
x_matrix_wait = demo2.fit_transform(x_plot) # 先拟合数据,然后转化它将其转化为标准形式。
y_plot_predict = np.dot(x_matrix_wait, omega)
plt.plot(x_plot, y_plot_predict, 'r-')

"""画data数据集的点"""
plt.plot(x_matrix, y_matrix, 'b.')

"""标志x轴和y轴"""
plt.xlabel('x')
plt.ylabel('y')

"""画"""
plt.show()
  • 输出
    lamb = 0.0001
    在这里插入图片描述

1.2 岭回归基于正规方程

  • 描述

W = ( X T X + λ I ) − 1 X T Y W=(X^TX+\lambda I)^{-1}X^TY W=(XTX+λI)1XTY
X : n × k 矩 阵 Y : n × 1 矩 阵 W : k × 1 矩 阵 I : k × k 单 位 矩 阵 n 为 样 本 个 数 k 为 特 征 数 矩 阵 乘 法 满 足 : 列 = 行 X: n\times k矩阵\\[6pt] Y: n\times 1矩阵\\[6pt] W: k\times 1矩阵\\[6pt] I: k\times k单位矩阵\\[6pt] n为样本个数\\[6pt] k为特征数\\[6pt] 矩阵乘法满足:列=行 X:n×kY:n×1W:k×1I:k×knk=

  • 代码
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error

# data
data = np.array([[-2.95507616,  10.94533252],
                 [-0.44226119,   2.96705822],
                 [-2.13294087,   6.57336839],
                 [1.84990823,   5.44244467],
                 [0.35139795,   2.83533936],
                 [-1.77443098,   5.6800407],
                 [-1.8657203,   6.34470814],
                 [1.61526823,   4.77833358],
                 [-2.38043687,   8.51887713],
                 [-1.40513866,   4.18262786]])
n = data.shape[0]  # 样本大小
x_matrix = data[:, 0].reshape(-1, 1)  # 将array转换成矩阵
y_matrix = data[:, 1].reshape(-1, 1)

# 代价函数
def loss_omega(omega_matrix_param, x_matrix_param, y_matrix_param, lambda_param):
    """
    omega_matrix_param: (k+1, 1)
    x_matrix_param: (n, k+1)
    y_matrix_param: (n, 1)
    """
    f = np.dot(x_matrix_param, omega_matrix_param)  # np.dot 表示矩阵乘法, f = (n, 1)
    omega_without_w0 = omega_matrix_param[1:]
    L_omega = 0.5 * mean_squared_error(f, y_matrix_param) + 0.5 * lambda_param * np.sum(np.square(omega_without_w0))
    return L_omega

# 初始训练参数
degree = 11
omega = np.ones((degree + 1, 1))  # 一共12特征变量数
lamb = 10

# 训练————岭回归————正规方程
demo1 = PolynomialFeatures(degree=degree, include_bias=False)
x_matrix_nihe = demo1.fit_transform(x_matrix)
x_matrix_x0 = np.c_[np.ones((n, 1)), x_matrix_nihe] # 就是按列叠加两个矩阵,就是把两个矩阵左右组合, x_matrix加了一列,位置为第0位
temp1_matrix = np.linalg.inv(np.dot(x_matrix_x0.T, x_matrix_x0) + lamb*np.identity(x_matrix_x0.shape[1]))
temp2_matrix = np.dot(temp1_matrix, x_matrix_x0.T)
temp3_matrix = np.dot(temp2_matrix, y_matrix)
omega = temp3_matrix

# 打印 omega, loss
print(omega)
print(loss_omega(omega_matrix_param=omega, x_matrix_param=x_matrix_x0, \
    y_matrix_param=y_matrix, lambda_param=lamb))

# 画图
"""画岭回归线"""
x_plot = np.linspace(-3, 2, 1000).reshape(-1, 1) # 许多x值,范围
demo = PolynomialFeatures(degree=degree, include_bias=True) # 12个参数,布尔值只产生交互项
x_matrix_wait = demo.fit_transform(x_plot) # 先拟合数据,然后转化它将其转化为标准形式。
y_plot_predict = np.dot(x_matrix_wait, omega)
plt.plot(x_plot, y_plot_predict, 'r-')

"""画data数据集的点"""
plt.plot(x_matrix, y_matrix, 'b.')

"""标志x轴和y轴"""
plt.xlabel('x')
plt.ylabel('y')

"""画"""
plt.show()

  • 输出

lamb = 10, 不同lamb,拟合程度不一样。
在这里插入图片描述

1.3 岭回归使用scikit-learn

  • 描述
Scikit-learn(以前称为scikits.learn,也称为sklearn)
- 是针对Python 编程语言的免费软件机器学习库 [1]- 它具有各种分类,回归和聚类算法,包括支持向量机,随机森林,梯度提升,k均值和DBSCAN,
- 并且旨在与Python数值科学库NumPy和SciPy联合使用。
  • 代码
from math import degrees
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge

# data
data = np.array([[-2.95507616,  10.94533252],
                 [-0.44226119,   2.96705822],
                 [-2.13294087,   6.57336839],
                 [1.84990823,   5.44244467],
                 [0.35139795,   2.83533936],
                 [-1.77443098,   5.6800407],
                 [-1.8657203,   6.34470814],
                 [1.61526823,   4.77833358],
                 [-2.38043687,   8.51887713],
                 [-1.40513866,   4.18262786]])
n = data.shape[0]  # 样本大小
x_matrix = data[:, 0].reshape(-1, 1)  # 将array转换成矩阵
y_matrix = data[:, 1].reshape(-1, 1)

# 代价函数
def loss_omega(intercept, omega_matrix_param, x_matrix_param, y_matrix_param, lambda_param):
    """
    omega_matrix_param: (k+1, 1)
    x_matrix_param: (n, k+1)
    y_matrix_param: (n, 1)
    """
    f = np.dot(x_matrix_param, omega_matrix_param) + intercept  # np.dot 表示矩阵乘法, f = (n, 1)
    omega_without_w0 = omega_matrix_param[1:]
    L_omega = 0.5 * mean_squared_error(f, y_matrix_param) + 0.5 * lambda_param * np.sum(np.square(omega_without_w0))
    return L_omega

# 初始训练参数
lamb = 10
degree = 11
demo = PolynomialFeatures(degree=degree, include_bias=False)
x_matrix_nihe = demo.fit_transform(x_matrix)

# 使用sklearn
ridge = Ridge(alpha=lamb, solver="cholesky")
ridge.fit(x_matrix_nihe, y_matrix)
omega = ridge.coef_.T

# 打印
print(ridge.intercept_, ridge.coef_)
print(loss_omega(intercept=ridge.intercept_, \
    omega_matrix_param=omega, \
        x_matrix_param=x_matrix_nihe, \
            y_matrix_param=y_matrix, \
                lambda_param=lamb))

# 画图
"""画岭回归线"""
x_plot = np.linspace(-3, 2, 1000).reshape(-1, 1) # 许多x值,范围
x_plot_ploy = demo.fit_transform(x_plot) # 先拟合数据,然后转化它将其转化为标准形式。
y_plot_predict = np.dot(x_plot_ploy, ridge.coef_.T) + ridge.intercept_
plt.plot(x_plot, y_plot_predict, 'r-')

"""画data数据集的点"""
plt.plot(x_matrix, y_matrix, 'b.')

"""标志x轴和y轴"""
plt.xlabel('x')
plt.ylabel('y')

"""画"""
plt.show()

  • 输出

lamb = 10
在这里插入图片描述

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐