循环神经网络RNN及其变体LSTM、GRU

和小潘一起学AI

386人浏览 · 2025-06-15 13:50:46

和小潘一起学AI · 2025-06-15 13:50:46 发布

RNN(pytorch版本)

import torch
import torch.nn as nn

rnn = nn.RNN(5, 6, 1)
# 维度为5，每个隐藏层6个神经元，1个隐藏层
input = torch.randn(1, 3, 5)
# 批次样本数量为1，序列长度为3，维度为5
h0 = torch.randn(1, 3, 6)
# 1个隐藏层，序列长度为3，每个隐藏层6个神经元
output, hn = rnn(input, h0)
# 输出部分 = rnn(输入部分)

print(output)
print(hn)
print(output.shape)
print(hn.shape)

"""
tensor([[[-0.1758,  0.3392,  0.7851,  0.0583,  0.9421, -0.6396],
         [ 0.7938,  0.3311, -0.6934, -0.9418,  0.9247, -0.3842],
         [-0.4600,  0.7236, -0.5175,  0.2813, -0.5300,  0.3985]]],
       grad_fn=<StackBackward0>)
tensor([[[-0.1758,  0.3392,  0.7851,  0.0583,  0.9421, -0.6396],
         [ 0.7938,  0.3311, -0.6934, -0.9418,  0.9247, -0.3842],
         [-0.4600,  0.7236, -0.5175,  0.2813, -0.5300,  0.3985]]],
       grad_fn=<StackBackward0>)
torch.Size([1, 3, 6])
torch.Size([1, 3, 6])
"""

RNN(numpy版本)

import numpy as np

# 定义 RNN 类
class RNN:
    def __init__(self, input_size, hidden_size, num_layers):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # 初始化权重和偏置
        self.W = np.random.randn(hidden_size, input_size)
        self.U = np.random.randn(hidden_size, hidden_size)
        self.b = np.zeros((hidden_size, 1))

    def forward(self, x, h0):
        batch_size, seq_length, _ = x.shape
        h = np.zeros((self.num_layers, batch_size, seq_length, self.hidden_size))
        h[0, :, 0, :] = h0[0, :, :]
        output = np.zeros((batch_size, seq_length, self.hidden_size))

        for t in range(seq_length):
            if t == 0:
                h[0, :, t, :] = np.tanh(np.dot(self.W, x[:, t, :].T) + np.dot(self.U, h0[0, :, :].T) + self.b).T
            else:
                h[0, :, t, :] = np.tanh(np.dot(self.W, x[:, t, :].T) + np.dot(self.U, h[0, :, t - 1, :].T) + self.b).T
            output[:, t, :] = h[0, :, t, :]

        hn = h[:, :, -1, :]
        return output, hn

# 参数
input_size = 5
hidden_size = 6
num_layers = 1

# 创建 RNN 实例
rnn = RNN(input_size, hidden_size, num_layers)

# 输入数据
input = np.random.randn(1, 3, input_size)
h0 = np.random.randn(num_layers, 1, hidden_size)

# 前向传播
output, hn = rnn.forward(input, h0)

print("Output:")
print(output)
print("Final Hidden State:")
print(hn)
print("Output shape:", output.shape)
print("Final Hidden State shape:", hn.shape)

"""
Output:
[[[ 0.9393916  -0.99748277 -0.92391202  0.99832362 -0.3663293
   -0.99260743]
  [-0.99289077  0.99703773 -0.9966218   0.69459619 -0.99964962
    0.90935893]
  [ 0.98077816  0.99997679 -0.99472697 -0.85161343  0.92207467
    0.96636791]]]
Final Hidden State:
[[[ 0.98077816  0.99997679 -0.99472697 -0.85161343  0.92207467
    0.96636791]]]
Output shape: (1, 3, 6)
Final Hidden State shape: (1, 1, 6)
"""

LSTM(pytorch版本)

import torch.nn as nn
import torch

lstm = nn.LSTM(5, 6, 2)
# 维度为5，每个隐藏层6个神经元，2个隐藏层
input = torch.randn(1, 3, 5)
# 批次样本数量为1，序列长度为3，维度为5
h0 = torch.randn(2, 3, 6)# 初始化 LSTM 的初始隐藏状态
# 1个隐藏层，序列长度为3，每个隐藏层6个神经元
c0 = torch.randn(2, 3, 6)# 初始化 LSTM 的初始细胞状态
# 2个隐藏层,序列长度为3，每个隐藏层6个神经元
output, (hn, cn) = lstm(input, (h0, c0))
# 输出部分 = lstm(输入部分)

print(output)
print(hn)
print(cn)
print(output.shape)
print(hn.shape)
print(cn.shape)

"""
tensor([[[ 0.2465,  0.4144,  0.2912, -0.0157, -0.0693,  0.1067],
         [-0.0261, -0.5055, -0.4013,  0.4995, -0.5971, -0.0331],
         [-0.2773,  0.0694,  0.1586, -0.1511, -0.0051, -0.4707]]],
       grad_fn=<StackBackward0>)
tensor([[[-0.1957,  0.0412, -0.5626,  0.2152,  0.3757,  0.2379],
         [-0.0030,  0.0779,  0.1966,  0.4887,  0.2604,  0.3688],
         [ 0.0533, -0.1841, -0.0854,  0.3123,  0.2481,  0.3225]],

        [[ 0.2465,  0.4144,  0.2912, -0.0157, -0.0693,  0.1067],
         [-0.0261, -0.5055, -0.4013,  0.4995, -0.5971, -0.0331],
         [-0.2773,  0.0694,  0.1586, -0.1511, -0.0051, -0.4707]]],
       grad_fn=<StackBackward0>)
tensor([[[-0.5586,  0.1939, -1.4773,  0.4838,  0.5345,  0.4465],
         [-0.0109,  0.1245,  0.2840,  0.6625,  1.6524,  0.5431],
         [ 0.1326, -0.4249, -0.1355,  1.0727,  0.4292,  0.5769]],

        [[ 0.4582,  0.8678,  0.6479, -0.0381, -0.1044,  0.2260],
         [-0.0364, -0.7348, -0.7428,  1.0751, -1.3216, -0.0771],
         [-0.4357,  0.1676,  0.5697, -0.5810, -0.0135, -0.7366]]],
       grad_fn=<StackBackward0>)
torch.Size([1, 3, 6])
torch.Size([2, 3, 6])
torch.Size([2, 3, 6])
"""

LSTM(numpy版本)

import numpy as np


# 定义 LSTM 类
class LSTM:
    def __init__(self, input_size, hidden_size, num_layers):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # 初始化权重和偏置
        self.Wi = np.random.randn(hidden_size, input_size)
        self.Ui = np.random.randn(hidden_size, hidden_size)
        self.bi = np.zeros((hidden_size, 1))

        self.Wf = np.random.randn(hidden_size, input_size)
        self.Uf = np.random.randn(hidden_size, hidden_size)
        self.bf = np.zeros((hidden_size, 1))

        self.Wc = np.random.randn(hidden_size, input_size)
        self.Uc = np.random.randn(hidden_size, hidden_size)
        self.bc = np.zeros((hidden_size, 1))

        self.Wo = np.random.randn(hidden_size, input_size)
        self.Uo = np.random.randn(hidden_size, hidden_size)
        self.bo = np.zeros((hidden_size, 1))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def tanh(self, x):
        return np.tanh(x)

    def forward(self, x, h0, c0):
        batch_size, seq_length, _ = x.shape
        h = np.zeros((self.num_layers, batch_size, seq_length, self.hidden_size))
        c = np.zeros((self.num_layers, batch_size, seq_length, self.hidden_size))
        h[0, :, 0, :] = h0[0, :, :]
        c[0, :, 0, :] = c0[0, :, :]
        output = np.zeros((batch_size, seq_length, self.hidden_size))

        for t in range(seq_length):
            if t == 0:
                xt = x[:, t, :].T
                ht_1 = h0[0, :, :].T
                ct_1 = c0[0, :, :].T
            else:
                xt = x[:, t, :].T
                ht_1 = h[0, :, t - 1, :].T
                ct_1 = c[0, :, t - 1, :].T

            it = self.sigmoid(np.dot(self.Wi, xt) + np.dot(self.Ui, ht_1) + self.bi)
            ft = self.sigmoid(np.dot(self.Wf, xt) + np.dot(self.Uf, ht_1) + self.bf)
            ct_hat = self.tanh(np.dot(self.Wc, xt) + np.dot(self.Uc, ht_1) + self.bc)
            ot = self.sigmoid(np.dot(self.Wo, xt) + np.dot(self.Uo, ht_1) + self.bo)

            ct = ft * ct_1 + it * ct_hat
            ht = ot * self.tanh(ct)

            h[0, :, t, :] = ht.T
            c[0, :, t, :] = ct.T
            output[:, t, :] = ht.T

        hn = h[:, :, -1, :]
        cn = c[:, :, -1, :]
        return output, (hn, cn)


# 参数
input_size = 5
hidden_size = 6
num_layers = 2

# 创建 LSTM 实例
lstm = LSTM(input_size, hidden_size, num_layers)

# 输入数据
input = np.random.randn(1, 3, input_size)
h0 = np.random.randn(num_layers, 1, hidden_size)
c0 = np.random.randn(num_layers, 1, hidden_size)

# 前向传播
output, (hn, cn) = lstm.forward(input, h0, c0)

print("Output:")
print(output)
print("Final Hidden State:")
print(hn)
print("Final Cell State:")
print(cn)
print("Output shape:", output.shape)
print("Final Hidden State shape:", hn.shape)
print("Final Cell State shape:", cn.shape)

"""
Output:
[[[-8.75303021e-05 -4.64070677e-01  6.47451278e-03  4.45485014e-02
   -9.77936077e-04 -2.28796212e-01]
  [-2.54687875e-01 -1.07572876e-01  9.21763948e-02  2.30478587e-02
   -2.40263090e-01 -1.47270855e-01]
  [ 1.31632885e-01 -8.97511524e-02  3.44463341e-01  2.28150239e-02
   -8.21965387e-02 -1.08865011e-01]]]
Final Hidden State:
[[[ 0.13163289 -0.08975115  0.34446334  0.02281502 -0.08219654
   -0.10886501]]

 [[ 0.          0.          0.          0.          0.
    0.        ]]]
Final Cell State:
[[[ 0.39569024 -0.14672883  0.64454692  0.09010789 -0.70117911
   -0.17909673]]

 [[ 0.          0.          0.          0.          0.
    0.        ]]]
Output shape: (1, 3, 6)
Final Hidden State shape: (2, 1, 6)
Final Cell State shape: (2, 1, 6)
"""

GRU(pytorch版本)

import torch
import torch.nn as nn

gru=nn.GRU(5,6,2)
# 维度为5，每个隐藏层6个神经元，2个隐藏层
input1=torch.randn(1,3,5)
# 批次样本数量为1，序列长度为3，维度为5
h0=torch.randn(2,3,6)
# 2个隐藏层，序列长度为3，每个隐藏层6个神经元
output,hn = gru(input1,h0)
# 输出部分 = gru(输入部分)

print(output)
print(hn)
print(output.shape)
print(hn.shape)

"""
tensor([[[ 0.9017, -0.8316, -0.7745,  0.3363, -0.4152, -0.2663],
         [-0.1789,  0.3301,  0.5574,  0.1021, -0.1050, -0.8172],
         [-0.3017,  0.4322,  0.1236,  0.5446, -0.2009, -0.3321]]],
       grad_fn=<StackBackward0>)
tensor([[[-0.5085,  0.8401, -0.1227, -1.2665,  0.2676, -0.7743],
         [-0.0389, -0.3141, -0.4438,  0.7459, -0.2061,  0.5883],
         [ 0.1583, -0.3947,  0.2210,  0.8555,  0.6499, -0.5270]],

        [[ 0.9017, -0.8316, -0.7745,  0.3363, -0.4152, -0.2663],
         [-0.1789,  0.3301,  0.5574,  0.1021, -0.1050, -0.8172],
         [-0.3017,  0.4322,  0.1236,  0.5446, -0.2009, -0.3321]]],
       grad_fn=<StackBackward0>)
torch.Size([1, 3, 6])
torch.Size([2, 3, 6])
"""

GRU(numpy版本)

import numpy as np


# 定义 GRU 类
class GRU:
    def __init__(self, input_size, hidden_size, num_layers):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # 初始化权重和偏置
        self.Wz = np.random.randn(hidden_size, input_size)
        self.Uz = np.random.randn(hidden_size, hidden_size)
        self.bz = np.zeros((hidden_size, 1))

        self.Wr = np.random.randn(hidden_size, input_size)
        self.Ur = np.random.randn(hidden_size, hidden_size)
        self.br = np.zeros((hidden_size, 1))

        self.Wh = np.random.randn(hidden_size, input_size)
        self.Uh = np.random.randn(hidden_size, hidden_size)
        self.bh = np.zeros((hidden_size, 1))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def tanh(self, x):
        return np.tanh(x)

    def forward(self, x, h0):
        batch_size, seq_length, _ = x.shape
        h = np.zeros((self.num_layers, batch_size, seq_length, self.hidden_size))
        h[0, :, 0, :] = h0[0, :, :]
        output = np.zeros((batch_size, seq_length, self.hidden_size))

        for t in range(seq_length):
            if t == 0:
                ht_1 = h0[0, :, :].T
            else:
                ht_1 = h[0, :, t - 1, :].T

            xt = x[:, t, :].T

            zt = self.sigmoid(np.dot(self.Wz, xt) + np.dot(self.Uz, ht_1) + self.bz)
            rt = self.sigmoid(np.dot(self.Wr, xt) + np.dot(self.Ur, ht_1) + self.br)
            ht_hat = self.tanh(np.dot(self.Wh, xt) + np.dot(self.Uh, rt * ht_1) + self.bh)
            ht = (1 - zt) * ht_1 + zt * ht_hat

            h[0, :, t, :] = ht.T
            output[:, t, :] = ht.T

        hn = h[:, :, -1, :]
        return output, hn


# 参数
input_size = 5
hidden_size = 6
num_layers = 2

# 创建 GRU 实例
gru = GRU(input_size, hidden_size, num_layers)

# 输入数据
input1 = np.random.randn(1, 3, input_size)
h0 = np.random.randn(num_layers, 1, hidden_size)

# 前向传播
output, hn = gru.forward(input1, h0)

print("Output:")
print(output)
print("Final Hidden State:")
print(hn)
print("Output shape:", output.shape)
print("Final Hidden State shape:", hn.shape)

"""
Output:
[[[ 1.02777087 -0.9960525   0.1767113   0.2257122  -0.87206964
    0.96244805]
  [-0.92457884 -0.99684284  0.64983873 -0.50689303 -0.61156001
    0.9624928 ]
  [ 0.05462699 -0.99681203 -0.99645779 -0.11594933  0.54586281
    0.83711046]]]
Final Hidden State:
[[[ 0.05462699 -0.99681203 -0.99645779 -0.11594933  0.54586281
    0.83711046]]

 [[ 0.          0.          0.          0.          0.
    0.        ]]]
Output shape: (1, 3, 6)
Final Hidden State shape: (2, 1, 6)
"""

技术共进，成长同行——讯飞AI开发者社区

更多推荐

物联网智能项目之——智能家居项目的实现！

讯飞AI开发者社区

[深度学习]卷积神经网络

本实验基于Python和PyTorch框架比较了LeNet、AlexNet、VGG和ResNet四种经典CNN模型在FashionMNIST数据集上的表现，并重点研究了超参数调整对模型性能的影响。实验结果表明：1）对于所有模型，SGD优化器普遍比Adam表现更好；2）学习率在0.05左右时模型性能最佳；3）增加训练轮数可以提高准确率但会延长训练时间；4）批量大小对模型性能影响相对较小。此外，通过简