In this work we build a sentiment analysis model based on a BERT-GRU model on tripadvisor data, in order to try to predict if an opinion is positive or negative.
BERT (Bidirectional Encoder Representations from Transformers) is a pretrained model based on transformers that has into account the context of the words. GRU layer is used instead of LSTM in this case.
Import pytorch
import torch
from torchtext import data
from torchtext import datasets
SEED = 1234
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
Setting up training¶
BATCH_SIZE = 16
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Load BERT model and tokenizer
!pip install transformers
from transformers import BertModel, BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert = BertModel.from_pretrained('bert-base-uncased')
Accuracy function
def binary_accuracy(preds, y):
rounded_preds = torch.round(torch.sigmoid(preds))
corrections = (rounded_preds == y).float()
acc = corrections.sum() / len(corrections)
return acc
#Función para pasar a float las categorías
def toFloat(x):
r=0.0
if (x=='POS'):
r =1.0
return r
def tokenize_and_cut(sentence):
maxlen = tokenizer.max_model_input_sizes['bert-base-uncased']
tokens = tokenizer.tokenize(sentence)
tokens = tokens[:maxlen-2]
return tokens
from torchtext import data
TEXT = data.Field(batch_first = True,
use_vocab = False,
tokenize = tokenize_and_cut,
preprocessing = tokenizer.convert_tokens_to_ids,
init_token = tokenizer.cls_token_id,
eos_token = tokenizer.sep_token_id,
pad_token = tokenizer.pad_token_id,
unk_token = tokenizer.unk_token_id)
LABELS = data.LabelField(dtype = torch.float,preprocessing=data.Pipeline(lambda x: toFloat(x)))
import random
fields = [('Short', TEXT),('Long',None),('Class',None),('Opinion', LABELS)]
train_data = data.TabularDataset(path = basepath+'./tripadvisor_data.csv',
format = 'csv',
fields = fields,
skip_header = True)
train_data, valid_data, test_data = train_data.split(split_ratio = [0.6, 0.2, 0.2], random_state = random.seed(SEED))
print(vars(train_data.examples[0]))
Cargamos el vocabulario
Construimos el vocabulario de opinión.
LABELS.build_vocab(train_data)
LABELS.vocab.freqs
BATCH_SIZE = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
(train_data, valid_data, test_data),
batch_size = BATCH_SIZE,
sort_key = lambda x: len(x.Short),
sort_within_batch = False,
device = device)
Model definition¶
import torch.nn as nn
class BERTGRUSentiment(nn.Module):
def __init__(self,
bert,
hidden_dim,
output_dim,
n_layers,
bidirectional,
dropout):
super().__init__()
self.bert = bert
embedding_dim = bert.config.to_dict()['hidden_size']
self.rnn = nn.GRU(embedding_dim,
hidden_dim,
num_layers = n_layers,
bidirectional = bidirectional,
batch_first = True,
dropout = 0 if n_layers < 2 else dropout)
self.out = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
with torch.no_grad(): # Freeze BERT parameters during training.
embedded = bert(text)[0]
_, hidden = self.rnn(embedded)
if self.rnn.bidirectional:
hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
else:
hidden = self.dropout(hidden[-1,:,:])
output = self.out(hidden)
return output
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.25
model = BERTGRUSentiment(bert,
HIDDEN_DIM,
OUTPUT_DIM,
N_LAYERS,
BIDIRECTIONAL,
DROPOUT)
#Number of parameters
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} parameters')
Training setup¶
Defining criterion y optimizer
import torch.optim as optim
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters())
model = model.to(device)
criterion = criterion.to(device)
Definimos funciones train y evaluate
def train(model, iterator, optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train()
for batch in iterator:
optimizer.zero_grad()
predictions = model(batch.Short).squeeze(1)
loss = criterion(predictions, batch.Opinion)
acc = binary_accuracy(predictions, batch.Opinion)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
model.eval()
with torch.no_grad():
for batch in iterator:
predictions = model(batch.Short).squeeze(1)
loss = criterion(predictions, batch.Opinion)
acc = binary_accuracy(predictions, batch.Opinion)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def epoch_time(start_time, end_time):
elapsed_time = end_time - start_time
elapsed_mins = int(elapsed_time / 60)
elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
return elapsed_mins, elapsed_secs
Training¶
import time
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut6-model.pt')
print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
Test¶
#Return predictions
def evaluate(model, iterator, criterion):
predictions_all = []
labels_all = []
model.eval() # deshabilita dropout y batch normalization
with torch.no_grad(): # para no calcular los gradientes durante las computaciones
for batch in iterator:
predictions = model(batch.Short).squeeze(1)
predictions_all += torch.round(torch.sigmoid(predictions)).flatten().cpu().numpy().tolist()
labels_all += batch.Opinion.flatten().cpu().numpy().tolist()
return predictions_all, labels_all
predictions, labels = evaluate(model, test_iterator, criterion)
from sklearn import metrics
tn, fp, fn, tp = metrics.confusion_matrix(labels, predictions).ravel()
(tn, fp, fn, tp)
accuracy3=(tp+tn)/(tp+fp+tn+fn)
recall3=(tp/(tp+fn))
F13=2*accuracy3*recall3/(accuracy3+recall3)
print('Accuracy:'+str(accuracy3)+" ; "+"Recall:"+str(recall3)+" ; "+"F1:"+str(F13))
Discussion¶
The precision of the model is good, although Recall and F1 metrics are worst than other models. We have to take into account that the model is not tuned, so we should be able to improve the model if we run more epochs and fine tune it.