import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 输入文本数据
texts = [
    "This is a good movie",
    "This is a bad movie",
    "The plot was exciting",
    "The plot was boring"
]

# 标签数据
labels = np.array([1, 0, 1, 0])

# 划分训练集和测试集
train_texts = texts[:3]
train_labels = labels[:3]
test_texts = texts[3:]
test_labels = labels[3:]

# 创建一个词汇表,并将文本转换成序列
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_texts)

train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)

# 对序列进行填充,保证输入数据的形状一样
max_sequence_length = max(len(sequence) for sequence in train_sequences)
train_data = pad_sequences(train_sequences, maxlen=max_sequence_length)
test_data = pad_sequences(test_sequences, maxlen=max_sequence_length)

# 构建模型
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, 32, input_length=max_sequence_length))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 训练模型
model.fit(train_data, train_labels, epochs=10, batch_size=1)

# 在测试集上评估模型
loss, accuracy = model.evaluate(test_data, test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)
 

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐