使用 python 编写一个基于 rnn 的文本分类的程序
【代码】使用 python 编写一个基于 rnn 的文本分类的程序。
·
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# 输入文本数据
texts = [
"This is a good movie",
"This is a bad movie",
"The plot was exciting",
"The plot was boring"
]
# 标签数据
labels = np.array([1, 0, 1, 0])
# 划分训练集和测试集
train_texts = texts[:3]
train_labels = labels[:3]
test_texts = texts[3:]
test_labels = labels[3:]
# 创建一个词汇表,并将文本转换成序列
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_texts)
train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)
# 对序列进行填充,保证输入数据的形状一样
max_sequence_length = max(len(sequence) for sequence in train_sequences)
train_data = pad_sequences(train_sequences, maxlen=max_sequence_length)
test_data = pad_sequences(test_sequences, maxlen=max_sequence_length)
# 构建模型
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, 32, input_length=max_sequence_length))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# 训练模型
model.fit(train_data, train_labels, epochs=10, batch_size=1)
# 在测试集上评估模型
loss, accuracy = model.evaluate(test_data, test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)
更多推荐
所有评论(0)