Skip to content
This repository was archived by the owner on Feb 12, 2022. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
4f3898d
Add data and corpuses in the gitignore
ngarneau Dec 20, 2018
8b12a9a
Re organize main file
ngarneau Dec 20, 2018
c4872fc
Pytoune experiments import
ngarneau Dec 20, 2018
57e3b8c
Removing prints
ngarneau Dec 20, 2018
a183e58
Adding alpha and betas into the model for the criterion
ngarneau Dec 20, 2018
a21cbc7
Cleaning the model and adding the criterion within it
ngarneau Dec 20, 2018
d9a4d81
Changing default dir name
ngarneau Dec 20, 2018
6929f6b
Using pytoune for the training loop and several callbacks
ngarneau Dec 22, 2018
c39ba2b
Creating a sentence loader for pytoune
ngarneau Dec 22, 2018
5d214c3
Slight change for parameter passing
ngarneau Dec 22, 2018
5dbace3
Remove print and useless methods
ngarneau Dec 22, 2018
baf7bf5
Adding some metrics during training
ngarneau Dec 22, 2018
e4d541c
Fixing the way we compute the loss on validation and the accuracy
ngarneau Dec 23, 2018
7c3704c
Fixing tmp variable name
ngarneau Dec 23, 2018
fd6061f
Properly compute number of batches
ngarneau Dec 23, 2018
428f18c
Fixing the dataset loader and print when switching to ASGD
ngarneau Dec 23, 2018
6375cc9
Using same batch size for valid and test, quick fix
ngarneau Dec 23, 2018
d6e9f29
Merge branch 'training-loop-cleaning' of https://github.com/ngarneau/…
ngarneau Dec 23, 2018
f8f81d0
Check if parameter exist in tmp var before reading it
ngarneau Dec 23, 2018
c490940
Merge branch 'training-loop-cleaning' of github.com:ngarneau/awd-lstm…
ngarneau Dec 23, 2018
7d4ff11
Fix the evaluation callback
ngarneau Dec 24, 2018
f5b5bb6
make old main work again
ngarneau Dec 26, 2018
d6407b2
Reduce LR on plateau callback
ngarneau Dec 26, 2018
001f9cc
Merge branch 'training-loop-cleaning' of github.com:ngarneau/awd-lstm…
ngarneau Dec 26, 2018
7f48b6a
Moving callbacks into seperate module
ngarneau Dec 26, 2018
f121619
Monitor validation loss
ngarneau Dec 26, 2018
01ac056
Document callbacks
ngarneau Dec 26, 2018
6ccae4e
Missing imports
ngarneau Dec 26, 2018
a715d47
Merge branch 'training-loop-cleaning' of github.com:ngarneau/awd-lstm…
ngarneau Dec 27, 2018
2c5ffe6
Properly name experiment with dataset and model name
ngarneau Dec 27, 2018
c8a6074
Temp fix for the batch size
ngarneau Dec 27, 2018
1010528
Change eval batch size
ngarneau Dec 27, 2018
6fb9872
Do not randomize when in eval mode
ngarneau Dec 27, 2018
e68066d
Dynamic handling of batch size on eval does not work
ngarneau Dec 27, 2018
371a6f2
Merge branch 'training-loop-cleaning' of https://github.com/ngarneau/…
ngarneau Dec 27, 2018
2e3cc6c
Wrangling batch size and randomness to reproduce exact results...
ngarneau Dec 27, 2018
e2c2ed3
Make main work back again on branch
ngarneau Dec 27, 2018
55a7c2a
Make sure we restart at same seq len
ngarneau Dec 29, 2018
6ecc6c0
Properly set seed in training loop
ngarneau Dec 29, 2018
3ef6b35
Patch to reset hidden state on evaluation
ngarneau Dec 29, 2018
a35243b
Fix evaluation and model saving
ngarneau Dec 29, 2018
a569717
Allow keyboard interrupt then evaluate on test
ngarneau Dec 30, 2018
bd1ad2b
Provide batch size to init hidden state
ngarneau Dec 30, 2018
51e8098
Add requirements file
ngarneau Dec 30, 2018
480319e
Merge branch 'training-loop-cleaning'
ngarneau Dec 30, 2018
7175311
Use pytoune main and model as the real main and model now
ngarneau Dec 30, 2018
4ffb50c
Use model instead of pytoune model for the RNNModel
ngarneau Jan 8, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
*.pt
__pycache__/

corpus.*
data/
results/
116 changes: 116 additions & 0 deletions callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import torch
import logging
from pytoune.framework import Callback

class HiddenInitCallback(Callback):
"""
This callback is used to reset the hidden state at each epoch.
"""
def __init__(self, train_batch_size, eval_batch_size):
self.train_batch_size = train_batch_size
self.eval_batch_size = eval_batch_size

def on_epoch_begin(self, epoch, logs):
self.model.model.init_hidden(self.train_batch_size)



class HiddenRepackagingCallback(Callback):
"""
At each batch we need to repackage the hidden state
so that the gradient does not backprop through the whole dataset.
"""
def on_batch_begin(self, batch, logs):
self.model.model.repackage_hidden()


class AdaptativeLRSchedulerCallback(Callback):
"""
This callback computes a learning rate based on the sequence length and the bptt.
"""
def __init__(self, loader):
self.loader = loader
self.lr2 = None

def on_batch_begin(self, batch, logs):
self.lr2 = self.model.optimizer.param_groups[0]['lr']
effective_learning_rate = self.lr2 * self.loader.seq_len / self.loader.bptt
self.model.optimizer.param_groups[0]['lr'] = effective_learning_rate

def on_batch_end(self, batch, logs):
self.model.optimizer.param_groups[0]['lr'] = self.lr2


class EvaluationCallback(Callback):
"""
This callback copies some params before the epoch begins.
It was not that clear why we did this in the original code, may need further documentation.
"""
def __init__(self, ):
self.tmp = {}

def on_epoch_begin(self, epoch, logs):
if 't0' in self.model.optimizer.param_groups[0]: # Check if we are in ASGD
for prm in self.model.model.parameters():
if prm in self.tmp:
prm.data = self.tmp[prm].clone()

def on_epoch_end(self, epoch, logs):
if 't0' in self.model.optimizer.param_groups[0]: # Check if we are in ASGD
for prm in self.model.model.parameters():
self.tmp[prm] = prm.data.clone()
prm.data = self.model.optimizer.state[prm]['ax'].clone()


class ASGDOptimizerSwitchCallback(Callback):
"""
This callback triggers the change of the SGD optimizer to ASGD,
given we are using SGD (not Adam) and the validation loss is non mono.
"""
def __init__(self, args):
self.args = args
self.val_losses = list()

def can_switch_to_asgd(self):
return self.args.optimizer == 'sgd' and 't0' not in self.model.optimizer.param_groups[0]

def is_non_mono(self, val_loss):
return len(self.val_losses) > self.args.nonmono and val_loss > min(self.val_losses[:-self.args.nonmono])

def on_epoch_end(self, epoch, logs):
val_loss = logs['val_loss']
if self.can_switch_to_asgd() and self.is_non_mono(val_loss):
logging.info("Switching to ASGD")
self.model.optimizer = torch.optim.ASGD(
self.model.model.parameters(),
lr=self.args.lr,
t0=0,
lambd=0.,
weight_decay=self.args.wdecay
)
self.val_losses.append(val_loss)


class MetricsCallback(Callback):
"""
This callback logs a bunch of information about parameters, gradients etc.
As suggested by https://github.com/allenai/writing-code-for-nlp-research-emnlp2018/blob/master/writing_code_for_nlp_research.pdf
"""
def __init__(self, logger):
super(MetricsCallback, self).__init__()
self.logger = logger

def on_backward_end(self, batch):
for parameter, values in self.model.model.named_parameters():
self.logger.log_scalar("{}.grad.mean".format(parameter), float(values.mean()))
self.logger.log_scalar("{}.grad.std".format(parameter), float(values.std()))

def on_batch_end(self, batch, logs):
self.logger.log_scalar("steps.train.loss", logs['loss'])
self.logger.log_scalar("steps.train.acc", logs['acc'])

def on_epoch_end(self, epoch, logs):
self.logger.log_scalar("epochs.train.loss", logs['loss'])
self.logger.log_scalar("epochs.train.acc", logs['acc'])
self.logger.log_scalar("epochs.val.loss", logs['val_loss'])
self.logger.log_scalar("epochs.val.acc", logs['val_acc'])
24 changes: 24 additions & 0 deletions configs/base.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"data": "./data/penn",
"model": "LSTM",
"emsize": 400,
"nhid": 1150,
"nlayers": 3,
"lr": 30,
"clip": 0.25,
"epochs": 8000,
"batch_size": 80,
"bptt": 70,
"dropout": 0.4,
"dropouth": 0.3,
"dropouti": 0.65,
"dropoute": 0.1,
"wdrop": 0.5,
"seed": 1111,
"nonmono": 5,
"log-interval": 200,
"alpha": 2,
"beta": 1,
"wdecay": 1.2e-6,
"optimizer": "sgd"
}
8 changes: 8 additions & 0 deletions configs/penn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"batch_size": 20,
"data": "./data/penn",
"dropouti": 0.4,
"dropouth": 0.25,
"seed": 141,
"epoch": 500
}
32 changes: 31 additions & 1 deletion data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import math
import os
from collections import Counter

import numpy as np
import torch
from torch.utils.data import DataLoader

from collections import Counter
from utils import get_batch


class Dictionary(object):
Expand Down Expand Up @@ -54,3 +59,28 @@ def tokenize(self, path):
token += 1

return ids


class SentenceLoader:
def __init__(self, dataset, bptt, train_mode=True):
self.bptt = bptt
self.seq_len = bptt
self.init_seq_len = bptt
self.dataset = dataset
self.train_mode = train_mode

def __iter__(self):
i = 0
self.seq_len = self.init_seq_len
while i < self.dataset.size(0) - 1 - 1:
if self.train_mode:
bptt = self.bptt if np.random.random() < 0.95 else self.bptt / 2.
self.seq_len = max(5, int(np.random.normal(bptt, 5)))
batch = get_batch(self.dataset, i, bptt, self.seq_len)
else:
batch = get_batch(self.dataset, i, self.bptt, self.seq_len)
i += self.seq_len
yield batch

def __len__(self):
return len(self.dataset) // self.bptt
Loading