import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re


from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

Using TensorFlow backend.

df = pd.read_csv("movie_data.csv")

df.head()

df.shape

(50000, 2)

df["sentiment"].unique()

array([1, 0], dtype=int64)

df["review"] = df["review"].str.lower()

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(df['review'].values)
X = tokenizer.texts_to_sequences(df['review'].values)
X = pad_sequences(X)

embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim, input_length = X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_1 (Embedding)      (None, 1939, 128)         256000    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 1939, 128)         0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 196)               254800    
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 394       
=================================================================
Total params: 511,194
Trainable params: 511,194
Non-trainable params: 0
_________________________________________________________________
None

y = pd.get_dummies(df['sentiment']).values
(X_train, X_test, y_train, y_test) = train_test_split(X, y, test_size = 0.33, random_state = 42)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(33500, 1939) (33500, 2)
(16500, 1939) (16500, 2)

batch_size = 32
model.fit(X_train, y_train, epochs=7, batch_size=batch_size, verbose=2)

Epoch 1/7
 - 7771s - loss: 0.4913 - accuracy: 0.7632
Epoch 2/7
 - 6812s - loss: 0.3962 - accuracy: 0.8316
Epoch 3/7
 - 7125s - loss: 0.4015 - accuracy: 0.8235
Epoch 4/7
 - 8689s - loss: 0.3461 - accuracy: 0.8487
Epoch 5/7
 - 6405s - loss: 0.2686 - accuracy: 0.8899
Epoch 6/7
 - 6377s - loss: 0.2399 - accuracy: 0.9034
Epoch 7/7
 - 6396s - loss: 0.2243 - accuracy: 0.9094

<keras.callbacks.callbacks.History at 0x207708d89e8>

y_pred = model.predict(X_test, verbose=2, batch_size=batch_size)

y_pred = np.array([ np.argmax(row) for row in y_pred ])
y_test = np.array([ np.argmax(row) for row in y_test ])

report = classification_report(y_test, y_pred, output_dict=True)
df = pd.DataFrame(report).transpose()
print(df.to_latex())

\begin{tabular}{lrrrr}
\toprule
{} &  f1-score &  precision &    recall &  support \\
\midrule
0            &  0.890747 &   0.897757 &  0.883846 &   8196.0 \\
1            &  0.893815 &   0.887083 &  0.900650 &   8304.0 \\
micro avg    &  0.892303 &   0.892303 &  0.892303 &  16500.0 \\
macro avg    &  0.892281 &   0.892420 &  0.892248 &  16500.0 \\
weighted avg &  0.892291 &   0.892385 &  0.892303 &  16500.0 \\
\bottomrule
\end{tabular}

print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.8923030303030303

from keras.models import load_model

model.save('my_model.h5')

	review	sentiment
0	I went and saw this movie last night after bei...	1
1	Actor turned director Bill Paxton follows up h...	1
2	As a recreational golfer with some knowledge o...	1
3	I saw this film in a sneak preview, and it is ...	1
4	Bill Paxton has taken the true story of the 19...	1