机器学习8——线性回归代码
文章目录1. rideg回归1.1 岭回归的手动实现1.2 岭回归基于正规方程1.3 岭回归使用scikit-learn1. rideg回归1.1 岭回归的手动实现描述手动实现,就是,根据上一篇博客的线性回归,里面岭回归公式的推导,一步一步来实现。- 均方误差:MSE- 损失函数- 求导:最小二乘法代码在这里插入代码片1.2 岭回归基于正规方程描述W=(XTX+λI)−1XTYW=(X^TX+\l
·
1. ridge回归
1.1 岭回归的手动实现
- 描述
手动实现,就是,根据上一篇博客的线性回归,里面岭回归公式的推导,一步一步来实现。
- 均方误差:MSE
- 损失函数
- 求导:最小二乘法
- 代码
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
# data
data = np.array([[-2.95507616, 10.94533252],
[-0.44226119, 2.96705822],
[-2.13294087, 6.57336839],
[1.84990823, 5.44244467],
[0.35139795, 2.83533936],
[-1.77443098, 5.6800407],
[-1.8657203, 6.34470814],
[1.61526823, 4.77833358],
[-2.38043687, 8.51887713],
[-1.40513866, 4.18262786]])
n = data.shape[0] # 样本大小
x_matrix = data[:, 0].reshape(-1, 1) # 将array转换成矩阵
y_matrix = data[:, 1].reshape(-1, 1)
# 代价函数
def loss_omega(omega_matrix_param, x_matrix_param, y_matrix_param, lambda_param):
"""
omega_matrix_param: (k+1, 1)
x_matrix_param: (n, k+1)
y_matrix_param: (n, 1)
"""
f = np.dot(x_matrix_param, omega_matrix_param) # np.dot 表示矩阵乘法, f = (n, 1)
omega_without_w0 = omega_matrix_param[1:]
L_omega = 0.5 * mean_squared_error(f, y_matrix_param) + 0.5 * lambda_param * np.sum(np.square(omega_without_w0))
return L_omega
# 梯度下降
def GD(omega_matrix_param, x_matrix_param, y_matrix_param, lambda_param, alpha_param):
for i in range(T):
h = np.dot(x_matrix_param, omega_matrix_param)
omega_w0 = np.r_[np.zeros([1, 1]), omega_matrix_param[1:]] # set omega_w0 = 0
omega_matrix_param -= (alpha_param * 1/n * np.dot(x_matrix_param.T, h - y_matrix_param) + lambda_param*(omega_w0)) # 求导
if i%50000 == 0:
print(loss_omega(omega_matrix_param=omega_matrix_param, x_matrix_param=\
x_matrix_param, y_matrix_param=y_matrix_param, lambda_param=lambda_param))
return omega_matrix_param
# 初始训练参数
T = 1200000 # 迭代次数
degree = 11
omega = np.ones((degree + 1, 1)) # 一共12特征变量数
alpha = 0.0000000006 # 学习率
lamb = 0.0001
# 训练————岭回归
demo1 = PolynomialFeatures(degree=degree, include_bias=False)
x_matrix_nihe = demo1.fit_transform(x_matrix)
x_matrix_x0 = np.c_[np.ones((n, 1)), x_matrix_nihe] # 就是按列叠加两个矩阵,就是把两个矩阵左右组合, x_matrix加了一列,位置为第0位
omega = GD(lambda_param=lamb, x_matrix_param=x_matrix_x0, omega_matrix_param=omega, y_matrix_param=y_matrix, alpha_param=alpha)
# 画图
"""画岭回归线"""
x_plot = np.linspace(-2.99, 1.9, 1000).reshape(-1, 1) # 许多x值,范围
demo2 = PolynomialFeatures(degree=degree, include_bias=True) # 12个参数,布尔值只产生交互项
x_matrix_wait = demo2.fit_transform(x_plot) # 先拟合数据,然后转化它将其转化为标准形式。
y_plot_predict = np.dot(x_matrix_wait, omega)
plt.plot(x_plot, y_plot_predict, 'r-')
"""画data数据集的点"""
plt.plot(x_matrix, y_matrix, 'b.')
"""标志x轴和y轴"""
plt.xlabel('x')
plt.ylabel('y')
"""画"""
plt.show()
- 输出
lamb = 0.0001
1.2 岭回归基于正规方程
- 描述
W = ( X T X + λ I ) − 1 X T Y W=(X^TX+\lambda I)^{-1}X^TY W=(XTX+λI)−1XTY
X : n × k 矩 阵 Y : n × 1 矩 阵 W : k × 1 矩 阵 I : k × k 单 位 矩 阵 n 为 样 本 个 数 k 为 特 征 数 矩 阵 乘 法 满 足 : 列 = 行 X: n\times k矩阵\\[6pt] Y: n\times 1矩阵\\[6pt] W: k\times 1矩阵\\[6pt] I: k\times k单位矩阵\\[6pt] n为样本个数\\[6pt] k为特征数\\[6pt] 矩阵乘法满足:列=行 X:n×k矩阵Y:n×1矩阵W:k×1矩阵I:k×k单位矩阵n为样本个数k为特征数矩阵乘法满足:列=行
- 代码
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
# data
data = np.array([[-2.95507616, 10.94533252],
[-0.44226119, 2.96705822],
[-2.13294087, 6.57336839],
[1.84990823, 5.44244467],
[0.35139795, 2.83533936],
[-1.77443098, 5.6800407],
[-1.8657203, 6.34470814],
[1.61526823, 4.77833358],
[-2.38043687, 8.51887713],
[-1.40513866, 4.18262786]])
n = data.shape[0] # 样本大小
x_matrix = data[:, 0].reshape(-1, 1) # 将array转换成矩阵
y_matrix = data[:, 1].reshape(-1, 1)
# 代价函数
def loss_omega(omega_matrix_param, x_matrix_param, y_matrix_param, lambda_param):
"""
omega_matrix_param: (k+1, 1)
x_matrix_param: (n, k+1)
y_matrix_param: (n, 1)
"""
f = np.dot(x_matrix_param, omega_matrix_param) # np.dot 表示矩阵乘法, f = (n, 1)
omega_without_w0 = omega_matrix_param[1:]
L_omega = 0.5 * mean_squared_error(f, y_matrix_param) + 0.5 * lambda_param * np.sum(np.square(omega_without_w0))
return L_omega
# 初始训练参数
degree = 11
omega = np.ones((degree + 1, 1)) # 一共12特征变量数
lamb = 10
# 训练————岭回归————正规方程
demo1 = PolynomialFeatures(degree=degree, include_bias=False)
x_matrix_nihe = demo1.fit_transform(x_matrix)
x_matrix_x0 = np.c_[np.ones((n, 1)), x_matrix_nihe] # 就是按列叠加两个矩阵,就是把两个矩阵左右组合, x_matrix加了一列,位置为第0位
temp1_matrix = np.linalg.inv(np.dot(x_matrix_x0.T, x_matrix_x0) + lamb*np.identity(x_matrix_x0.shape[1]))
temp2_matrix = np.dot(temp1_matrix, x_matrix_x0.T)
temp3_matrix = np.dot(temp2_matrix, y_matrix)
omega = temp3_matrix
# 打印 omega, loss
print(omega)
print(loss_omega(omega_matrix_param=omega, x_matrix_param=x_matrix_x0, \
y_matrix_param=y_matrix, lambda_param=lamb))
# 画图
"""画岭回归线"""
x_plot = np.linspace(-3, 2, 1000).reshape(-1, 1) # 许多x值,范围
demo = PolynomialFeatures(degree=degree, include_bias=True) # 12个参数,布尔值只产生交互项
x_matrix_wait = demo.fit_transform(x_plot) # 先拟合数据,然后转化它将其转化为标准形式。
y_plot_predict = np.dot(x_matrix_wait, omega)
plt.plot(x_plot, y_plot_predict, 'r-')
"""画data数据集的点"""
plt.plot(x_matrix, y_matrix, 'b.')
"""标志x轴和y轴"""
plt.xlabel('x')
plt.ylabel('y')
"""画"""
plt.show()
- 输出
lamb = 10, 不同lamb,拟合程度不一样。
1.3 岭回归使用scikit-learn
- 描述
Scikit-learn(以前称为scikits.learn,也称为sklearn)
- 是针对Python 编程语言的免费软件机器学习库 [1] 。
- 它具有各种分类,回归和聚类算法,包括支持向量机,随机森林,梯度提升,k均值和DBSCAN,
- 并且旨在与Python数值科学库NumPy和SciPy联合使用。
- 代码
from math import degrees
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
# data
data = np.array([[-2.95507616, 10.94533252],
[-0.44226119, 2.96705822],
[-2.13294087, 6.57336839],
[1.84990823, 5.44244467],
[0.35139795, 2.83533936],
[-1.77443098, 5.6800407],
[-1.8657203, 6.34470814],
[1.61526823, 4.77833358],
[-2.38043687, 8.51887713],
[-1.40513866, 4.18262786]])
n = data.shape[0] # 样本大小
x_matrix = data[:, 0].reshape(-1, 1) # 将array转换成矩阵
y_matrix = data[:, 1].reshape(-1, 1)
# 代价函数
def loss_omega(intercept, omega_matrix_param, x_matrix_param, y_matrix_param, lambda_param):
"""
omega_matrix_param: (k+1, 1)
x_matrix_param: (n, k+1)
y_matrix_param: (n, 1)
"""
f = np.dot(x_matrix_param, omega_matrix_param) + intercept # np.dot 表示矩阵乘法, f = (n, 1)
omega_without_w0 = omega_matrix_param[1:]
L_omega = 0.5 * mean_squared_error(f, y_matrix_param) + 0.5 * lambda_param * np.sum(np.square(omega_without_w0))
return L_omega
# 初始训练参数
lamb = 10
degree = 11
demo = PolynomialFeatures(degree=degree, include_bias=False)
x_matrix_nihe = demo.fit_transform(x_matrix)
# 使用sklearn
ridge = Ridge(alpha=lamb, solver="cholesky")
ridge.fit(x_matrix_nihe, y_matrix)
omega = ridge.coef_.T
# 打印
print(ridge.intercept_, ridge.coef_)
print(loss_omega(intercept=ridge.intercept_, \
omega_matrix_param=omega, \
x_matrix_param=x_matrix_nihe, \
y_matrix_param=y_matrix, \
lambda_param=lamb))
# 画图
"""画岭回归线"""
x_plot = np.linspace(-3, 2, 1000).reshape(-1, 1) # 许多x值,范围
x_plot_ploy = demo.fit_transform(x_plot) # 先拟合数据,然后转化它将其转化为标准形式。
y_plot_predict = np.dot(x_plot_ploy, ridge.coef_.T) + ridge.intercept_
plt.plot(x_plot, y_plot_predict, 'r-')
"""画data数据集的点"""
plt.plot(x_matrix, y_matrix, 'b.')
"""标志x轴和y轴"""
plt.xlabel('x')
plt.ylabel('y')
"""画"""
plt.show()
- 输出
lamb = 10
更多推荐
所有评论(0)