master
aolingwen 6 years ago
parent c840fa81c9
commit 00bed3325a

Binary file not shown.

@ -1,45 +1,57 @@
from keras.preprocessing import sequence import pandas as pd
from keras.models import Sequential from sortedcontainers import SortedSet
from keras.layers import Dense, Embedding import numpy as np
from keras.layers import LSTM from sklearn.model_selection import train_test_split
from keras.datasets import imdb from keras.layers import Dense, Embedding, Input, Flatten
from keras.layers import LSTM, GRU, Dropout
max_features = 20000 from keras.models import Model
# cut texts after this number of words (among top max_features most common words) import keras
maxlen = 80 from keras.utils import plot_model
batch_size = 32 import utils
import time
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences') def build_model(want_answer_size, infact_answer_size):
print(len(x_test), 'test sequences') inputs_want_answer = Input(shape=(want_answer_size, ), name='want_answer_input')
inputs_infact_answer = Input(shape=(infact_answer_size, ), name='infact_answer_input')
print('Pad sequences (samples x time)') x_1 = Embedding(want_answer_size, 128, name='want_answer_embedding', embeddings_initializer='he_normal', embeddings_regularizer=keras.regularizers.l2(0.01))(inputs_want_answer)
x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_2 = Embedding(infact_answer_size, 128, name='infact_answer_embedding', embeddings_initializer='he_normal', embeddings_regularizer=keras.regularizers.l2(0.01))(inputs_infact_answer)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen) x_1 = GRU(128, dropout=0.4, return_sequences=True, recurrent_initializer='he_normal', recurrent_regularizer=keras.regularizers.l2(0.01))(x_1)
print('x_train shape:', x_train.shape) x_2 = GRU(128, dropout=0.4, return_sequences=True, recurrent_initializer='he_normal', recurrent_regularizer=keras.regularizers.l2(0.01))(x_2)
print('x_test shape:', x_test.shape) x = keras.layers.concatenate([x_1, x_2])
x = Flatten()(x)
x = Dropout(0.3)(x)
print('Build model...') x = Dense(64, activation='relu')(x)
model = Sequential() predictions = Dense(2, activation='softmax')(x)
model.add(Embedding(max_features, 128)) model = Model(inputs=[inputs_want_answer, inputs_infact_answer], outputs=predictions)
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) return model
model.add(Dense(1, activation='sigmoid'))
# try using different optimizers and different optimizer configs if __name__ == '__main__':
model.compile(loss='binary_crossentropy', df = pd.read_excel('./预期输出与实际输出数据表.xlsx')
optimizer='adam', want_answer_corpus, infact_answer_corpus = utils.build_corpus(df)
onehot = utils.label2onehot(df['是否正确'])
x_train_1, x_test_1, y_train, y_test = train_test_split(want_answer_corpus, onehot, random_state=2333)
x_train_2, x_test_2, _, _ = train_test_split(infact_answer_corpus, onehot, random_state=2333)
want_answer_corpus_size = len(want_answer_corpus[0])
infact_answer_corpus_size = len(infact_answer_corpus[0])
model = build_model(want_answer_corpus_size, infact_answer_corpus_size)
# plot_model(model, to_file='model.png')
model.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adam(lr=1e-4),
metrics=['accuracy']) metrics=['accuracy'])
print(model.summary())
print('Train...') print('Train...')
model.fit(x_train, y_train, model.fit([x_train_1, x_train_2], y_train,
batch_size=batch_size, batch_size=16,
epochs=15, epochs=50)
validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test, score, acc = model.evaluate([x_test_1, x_test_2], y_test,
batch_size=batch_size) batch_size=8, verbose=0)
print('Test score:', score) print('Test score:', score)
print('Test accuracy:', acc) print('Test accuracy:', acc)

@ -114,38 +114,7 @@ def label2onehot(label):
if __name__ == '__main__': if __name__ == '__main__':
df = pd.read_excel('./预期输出与实际输出数据表.xlsx') pass
want_answer_corpus, infact_answer_corpus = build_corpus(df)
onehot = label2onehot(df['是否正确'])
x_train_1, x_test_1, y_train, y_test = train_test_split(want_answer_corpus, onehot, random_state=2333)
x_train_2, x_test_2, _, _ = train_test_split(infact_answer_corpus, onehot, random_state=2333)
inputs_want_answer = Input(shape=(len(want_answer_corpus[0]), ), name='want_answer_input')
inputs_infact_answer = Input(shape=(len(infact_answer_corpus[0]), ), name='infact_answer_input')
x_1 = Embedding(len(want_answer_corpus[0]), 64, name='want_answer_embedding')(inputs_want_answer)
x_2 = Embedding(len(infact_answer_corpus[0]), 64, name='infact_answer_embedding')(inputs_infact_answer)
x_1 = GRU(64, dropout=0.5, return_sequences=0.2)(x_1)
x_2 = GRU(64, dropout=0.5, return_sequences=0.2)(x_2)
x = keras.layers.concatenate([x_1, x_2])
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=[inputs_want_answer, inputs_infact_answer], outputs=predictions)
# plot_model(model, to_file='model.png')
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print('Train...')
model.fit([x_train_1, x_train_2], y_train,
batch_size=16,
epochs=60)
score, acc = model.evaluate([x_test_1, x_test_2], y_test,
batch_size=8)
print('Test score:', score)
print('Test accuracy:', acc)

Loading…
Cancel
Save