循环神经网络RNN及其变体LSTM、GRU
·
RNN(pytorch版本)
import torch
import torch.nn as nn
rnn = nn.RNN(5, 6, 1)
# 维度为5,每个隐藏层6个神经元,1个隐藏层
input = torch.randn(1, 3, 5)
# 批次样本数量为1,序列长度为3,维度为5
h0 = torch.randn(1, 3, 6)
# 1个隐藏层,序列长度为3,每个隐藏层6个神经元
output, hn = rnn(input, h0)
# 输出部分 = rnn(输入部分)
print(output)
print(hn)
print(output.shape)
print(hn.shape)
"""
tensor([[[-0.1758, 0.3392, 0.7851, 0.0583, 0.9421, -0.6396],
[ 0.7938, 0.3311, -0.6934, -0.9418, 0.9247, -0.3842],
[-0.4600, 0.7236, -0.5175, 0.2813, -0.5300, 0.3985]]],
grad_fn=<StackBackward0>)
tensor([[[-0.1758, 0.3392, 0.7851, 0.0583, 0.9421, -0.6396],
[ 0.7938, 0.3311, -0.6934, -0.9418, 0.9247, -0.3842],
[-0.4600, 0.7236, -0.5175, 0.2813, -0.5300, 0.3985]]],
grad_fn=<StackBackward0>)
torch.Size([1, 3, 6])
torch.Size([1, 3, 6])
"""
RNN(numpy版本)
import numpy as np
# 定义 RNN 类
class RNN:
def __init__(self, input_size, hidden_size, num_layers):
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
# 初始化权重和偏置
self.W = np.random.randn(hidden_size, input_size)
self.U = np.random.randn(hidden_size, hidden_size)
self.b = np.zeros((hidden_size, 1))
def forward(self, x, h0):
batch_size, seq_length, _ = x.shape
h = np.zeros((self.num_layers, batch_size, seq_length, self.hidden_size))
h[0, :, 0, :] = h0[0, :, :]
output = np.zeros((batch_size, seq_length, self.hidden_size))
for t in range(seq_length):
if t == 0:
h[0, :, t, :] = np.tanh(np.dot(self.W, x[:, t, :].T) + np.dot(self.U, h0[0, :, :].T) + self.b).T
else:
h[0, :, t, :] = np.tanh(np.dot(self.W, x[:, t, :].T) + np.dot(self.U, h[0, :, t - 1, :].T) + self.b).T
output[:, t, :] = h[0, :, t, :]
hn = h[:, :, -1, :]
return output, hn
# 参数
input_size = 5
hidden_size = 6
num_layers = 1
# 创建 RNN 实例
rnn = RNN(input_size, hidden_size, num_layers)
# 输入数据
input = np.random.randn(1, 3, input_size)
h0 = np.random.randn(num_layers, 1, hidden_size)
# 前向传播
output, hn = rnn.forward(input, h0)
print("Output:")
print(output)
print("Final Hidden State:")
print(hn)
print("Output shape:", output.shape)
print("Final Hidden State shape:", hn.shape)
"""
Output:
[[[ 0.9393916 -0.99748277 -0.92391202 0.99832362 -0.3663293
-0.99260743]
[-0.99289077 0.99703773 -0.9966218 0.69459619 -0.99964962
0.90935893]
[ 0.98077816 0.99997679 -0.99472697 -0.85161343 0.92207467
0.96636791]]]
Final Hidden State:
[[[ 0.98077816 0.99997679 -0.99472697 -0.85161343 0.92207467
0.96636791]]]
Output shape: (1, 3, 6)
Final Hidden State shape: (1, 1, 6)
"""
LSTM(pytorch版本)
import torch.nn as nn
import torch
lstm = nn.LSTM(5, 6, 2)
# 维度为5,每个隐藏层6个神经元,2个隐藏层
input = torch.randn(1, 3, 5)
# 批次样本数量为1,序列长度为3,维度为5
h0 = torch.randn(2, 3, 6)# 初始化 LSTM 的初始隐藏状态
# 1个隐藏层,序列长度为3,每个隐藏层6个神经元
c0 = torch.randn(2, 3, 6)# 初始化 LSTM 的初始细胞状态
# 2个隐藏层,序列长度为3,每个隐藏层6个神经元
output, (hn, cn) = lstm(input, (h0, c0))
# 输出部分 = lstm(输入部分)
print(output)
print(hn)
print(cn)
print(output.shape)
print(hn.shape)
print(cn.shape)
"""
tensor([[[ 0.2465, 0.4144, 0.2912, -0.0157, -0.0693, 0.1067],
[-0.0261, -0.5055, -0.4013, 0.4995, -0.5971, -0.0331],
[-0.2773, 0.0694, 0.1586, -0.1511, -0.0051, -0.4707]]],
grad_fn=<StackBackward0>)
tensor([[[-0.1957, 0.0412, -0.5626, 0.2152, 0.3757, 0.2379],
[-0.0030, 0.0779, 0.1966, 0.4887, 0.2604, 0.3688],
[ 0.0533, -0.1841, -0.0854, 0.3123, 0.2481, 0.3225]],
[[ 0.2465, 0.4144, 0.2912, -0.0157, -0.0693, 0.1067],
[-0.0261, -0.5055, -0.4013, 0.4995, -0.5971, -0.0331],
[-0.2773, 0.0694, 0.1586, -0.1511, -0.0051, -0.4707]]],
grad_fn=<StackBackward0>)
tensor([[[-0.5586, 0.1939, -1.4773, 0.4838, 0.5345, 0.4465],
[-0.0109, 0.1245, 0.2840, 0.6625, 1.6524, 0.5431],
[ 0.1326, -0.4249, -0.1355, 1.0727, 0.4292, 0.5769]],
[[ 0.4582, 0.8678, 0.6479, -0.0381, -0.1044, 0.2260],
[-0.0364, -0.7348, -0.7428, 1.0751, -1.3216, -0.0771],
[-0.4357, 0.1676, 0.5697, -0.5810, -0.0135, -0.7366]]],
grad_fn=<StackBackward0>)
torch.Size([1, 3, 6])
torch.Size([2, 3, 6])
torch.Size([2, 3, 6])
"""
LSTM(numpy版本)
import numpy as np
# 定义 LSTM 类
class LSTM:
def __init__(self, input_size, hidden_size, num_layers):
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
# 初始化权重和偏置
self.Wi = np.random.randn(hidden_size, input_size)
self.Ui = np.random.randn(hidden_size, hidden_size)
self.bi = np.zeros((hidden_size, 1))
self.Wf = np.random.randn(hidden_size, input_size)
self.Uf = np.random.randn(hidden_size, hidden_size)
self.bf = np.zeros((hidden_size, 1))
self.Wc = np.random.randn(hidden_size, input_size)
self.Uc = np.random.randn(hidden_size, hidden_size)
self.bc = np.zeros((hidden_size, 1))
self.Wo = np.random.randn(hidden_size, input_size)
self.Uo = np.random.randn(hidden_size, hidden_size)
self.bo = np.zeros((hidden_size, 1))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def tanh(self, x):
return np.tanh(x)
def forward(self, x, h0, c0):
batch_size, seq_length, _ = x.shape
h = np.zeros((self.num_layers, batch_size, seq_length, self.hidden_size))
c = np.zeros((self.num_layers, batch_size, seq_length, self.hidden_size))
h[0, :, 0, :] = h0[0, :, :]
c[0, :, 0, :] = c0[0, :, :]
output = np.zeros((batch_size, seq_length, self.hidden_size))
for t in range(seq_length):
if t == 0:
xt = x[:, t, :].T
ht_1 = h0[0, :, :].T
ct_1 = c0[0, :, :].T
else:
xt = x[:, t, :].T
ht_1 = h[0, :, t - 1, :].T
ct_1 = c[0, :, t - 1, :].T
it = self.sigmoid(np.dot(self.Wi, xt) + np.dot(self.Ui, ht_1) + self.bi)
ft = self.sigmoid(np.dot(self.Wf, xt) + np.dot(self.Uf, ht_1) + self.bf)
ct_hat = self.tanh(np.dot(self.Wc, xt) + np.dot(self.Uc, ht_1) + self.bc)
ot = self.sigmoid(np.dot(self.Wo, xt) + np.dot(self.Uo, ht_1) + self.bo)
ct = ft * ct_1 + it * ct_hat
ht = ot * self.tanh(ct)
h[0, :, t, :] = ht.T
c[0, :, t, :] = ct.T
output[:, t, :] = ht.T
hn = h[:, :, -1, :]
cn = c[:, :, -1, :]
return output, (hn, cn)
# 参数
input_size = 5
hidden_size = 6
num_layers = 2
# 创建 LSTM 实例
lstm = LSTM(input_size, hidden_size, num_layers)
# 输入数据
input = np.random.randn(1, 3, input_size)
h0 = np.random.randn(num_layers, 1, hidden_size)
c0 = np.random.randn(num_layers, 1, hidden_size)
# 前向传播
output, (hn, cn) = lstm.forward(input, h0, c0)
print("Output:")
print(output)
print("Final Hidden State:")
print(hn)
print("Final Cell State:")
print(cn)
print("Output shape:", output.shape)
print("Final Hidden State shape:", hn.shape)
print("Final Cell State shape:", cn.shape)
"""
Output:
[[[-8.75303021e-05 -4.64070677e-01 6.47451278e-03 4.45485014e-02
-9.77936077e-04 -2.28796212e-01]
[-2.54687875e-01 -1.07572876e-01 9.21763948e-02 2.30478587e-02
-2.40263090e-01 -1.47270855e-01]
[ 1.31632885e-01 -8.97511524e-02 3.44463341e-01 2.28150239e-02
-8.21965387e-02 -1.08865011e-01]]]
Final Hidden State:
[[[ 0.13163289 -0.08975115 0.34446334 0.02281502 -0.08219654
-0.10886501]]
[[ 0. 0. 0. 0. 0.
0. ]]]
Final Cell State:
[[[ 0.39569024 -0.14672883 0.64454692 0.09010789 -0.70117911
-0.17909673]]
[[ 0. 0. 0. 0. 0.
0. ]]]
Output shape: (1, 3, 6)
Final Hidden State shape: (2, 1, 6)
Final Cell State shape: (2, 1, 6)
"""
GRU(pytorch版本)
import torch
import torch.nn as nn
gru=nn.GRU(5,6,2)
# 维度为5,每个隐藏层6个神经元,2个隐藏层
input1=torch.randn(1,3,5)
# 批次样本数量为1,序列长度为3,维度为5
h0=torch.randn(2,3,6)
# 2个隐藏层,序列长度为3,每个隐藏层6个神经元
output,hn = gru(input1,h0)
# 输出部分 = gru(输入部分)
print(output)
print(hn)
print(output.shape)
print(hn.shape)
"""
tensor([[[ 0.9017, -0.8316, -0.7745, 0.3363, -0.4152, -0.2663],
[-0.1789, 0.3301, 0.5574, 0.1021, -0.1050, -0.8172],
[-0.3017, 0.4322, 0.1236, 0.5446, -0.2009, -0.3321]]],
grad_fn=<StackBackward0>)
tensor([[[-0.5085, 0.8401, -0.1227, -1.2665, 0.2676, -0.7743],
[-0.0389, -0.3141, -0.4438, 0.7459, -0.2061, 0.5883],
[ 0.1583, -0.3947, 0.2210, 0.8555, 0.6499, -0.5270]],
[[ 0.9017, -0.8316, -0.7745, 0.3363, -0.4152, -0.2663],
[-0.1789, 0.3301, 0.5574, 0.1021, -0.1050, -0.8172],
[-0.3017, 0.4322, 0.1236, 0.5446, -0.2009, -0.3321]]],
grad_fn=<StackBackward0>)
torch.Size([1, 3, 6])
torch.Size([2, 3, 6])
"""
GRU(numpy版本)
import numpy as np
# 定义 GRU 类
class GRU:
def __init__(self, input_size, hidden_size, num_layers):
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
# 初始化权重和偏置
self.Wz = np.random.randn(hidden_size, input_size)
self.Uz = np.random.randn(hidden_size, hidden_size)
self.bz = np.zeros((hidden_size, 1))
self.Wr = np.random.randn(hidden_size, input_size)
self.Ur = np.random.randn(hidden_size, hidden_size)
self.br = np.zeros((hidden_size, 1))
self.Wh = np.random.randn(hidden_size, input_size)
self.Uh = np.random.randn(hidden_size, hidden_size)
self.bh = np.zeros((hidden_size, 1))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def tanh(self, x):
return np.tanh(x)
def forward(self, x, h0):
batch_size, seq_length, _ = x.shape
h = np.zeros((self.num_layers, batch_size, seq_length, self.hidden_size))
h[0, :, 0, :] = h0[0, :, :]
output = np.zeros((batch_size, seq_length, self.hidden_size))
for t in range(seq_length):
if t == 0:
ht_1 = h0[0, :, :].T
else:
ht_1 = h[0, :, t - 1, :].T
xt = x[:, t, :].T
zt = self.sigmoid(np.dot(self.Wz, xt) + np.dot(self.Uz, ht_1) + self.bz)
rt = self.sigmoid(np.dot(self.Wr, xt) + np.dot(self.Ur, ht_1) + self.br)
ht_hat = self.tanh(np.dot(self.Wh, xt) + np.dot(self.Uh, rt * ht_1) + self.bh)
ht = (1 - zt) * ht_1 + zt * ht_hat
h[0, :, t, :] = ht.T
output[:, t, :] = ht.T
hn = h[:, :, -1, :]
return output, hn
# 参数
input_size = 5
hidden_size = 6
num_layers = 2
# 创建 GRU 实例
gru = GRU(input_size, hidden_size, num_layers)
# 输入数据
input1 = np.random.randn(1, 3, input_size)
h0 = np.random.randn(num_layers, 1, hidden_size)
# 前向传播
output, hn = gru.forward(input1, h0)
print("Output:")
print(output)
print("Final Hidden State:")
print(hn)
print("Output shape:", output.shape)
print("Final Hidden State shape:", hn.shape)
"""
Output:
[[[ 1.02777087 -0.9960525 0.1767113 0.2257122 -0.87206964
0.96244805]
[-0.92457884 -0.99684284 0.64983873 -0.50689303 -0.61156001
0.9624928 ]
[ 0.05462699 -0.99681203 -0.99645779 -0.11594933 0.54586281
0.83711046]]]
Final Hidden State:
[[[ 0.05462699 -0.99681203 -0.99645779 -0.11594933 0.54586281
0.83711046]]
[[ 0. 0. 0. 0. 0.
0. ]]]
Output shape: (1, 3, 6)
Final Hidden State shape: (2, 1, 6)
"""
更多推荐
所有评论(0)