The last deep learning framework you will ever need
Piotr MazurekYou know how "standard" DL training loop looks like
You (more-less) know how PyTorch works
Torch model
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
return self.fc2(x)
Torch training loop
for epoch in range(EPOCHS):
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
...
I heard that the lr scheduler is a cool feature
Can we add that?
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
for epoch in range(EPOCHS):
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
scheduler.step()
scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
for epoch in range(EPOCHS):
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
optimizer.step()
scheduler.step()
scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
for epoch in range(EPOCHS):
loss = 0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss += criterion(outputs, labels) # Compute loss function
if (i+1) % accumulation_steps == 0:
loss = loss / accumulation_steps
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
optimizer.step()
scheduler.step()
from suspicious_not_tested_github_repo import EarlyStopping
es = EarlyStopping(no_documentation_whatsoever=True)
scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
writer = SummaryWriter()
for epoch in range(EPOCHS):
loss = 0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss += criterion(outputs, labels) # Compute loss function
if phase = "training":
if (i+1) % accumulation_steps == 0:
loss = loss / accumulation_steps
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
optimizer.step()
scheduler.step()
if phase = "training":
writer.add_scalar('Loss/train', loss)
elif phase = "test":
writer.add_scalar('Loss/test', loss)
for epoch in range(EPOCHS):
...
if phase = "training":
wandb.log({'epoch': epoch, 'train_loss': loss})
elif phase = "test":
wandb.log({'epoch': epoch, 'test_loss': loss})
Oh, we've commented line 5
for epoch in range(EPOCHS):
loss = 0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
#optimizer.zero_grad()
outputs = net(inputs)
loss += criterion(outputs, labels) # Compute loss function
if phase = "training":
if (i+1) % accumulation_steps == 0:
loss = loss / accumulation_steps
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
optimizer.step()
scheduler.step()
Nvidia gave us a GPU
Can we run this code using CUDA?
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
for epoch in range(EPOCHS):
for i, data in enumerate(trainloader, 0):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss += criterion(outputs, labels) # Compute loss function
...
Alexa!
Find PyTorch DataParallel tutorial
*in English
But it became a little bit messy
The solution - just use Lightning
Just kiddin
Lets dive into Lightning
You do the cool staff
Lightning takes care of the boilerplate
for epoch in range(EPOCHS):
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
...
Lightning takes care about all this boilerplate
class CustomClassifier(pl.LightningModule):
def __init__(self):
super().__init__()
self.criterion = nn.CrossEntropyLoss()
self.fc1 = nn.Linear(784, 10)
def forward(self, x):
return self.fc1(x)
def training_step(self, batch, batch_idx):
x, y = batch
logits = self.forward(x)
loss = self.criterion(logits, y)
tensorboard_logs = {'train_loss': loss}
return {'loss': loss, 'log': tensorboard_logs}
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
return [optimizer], [scheduler]
Actually
That is all you need to train the model
class CustomDatamodule(pl.LightningDataModule):
def __init__(self, batch_size):
super().__init__()
self.batch_size = batch_size
def setup(self, stage=None):
self.train_set = StandardTorchDataset(train=True)
self.val_set = StandardTorchDataset(train=False)
def train_dataloader(self):
return DataLoader(self.train_set,
batch_size=self.batch_size,
shuffle=True, num_workers=4)
def val_dataloader(self):
return DataLoader(self.val_set,
batch_size=self.batch_size,
shuffle=False, num_workers=4)
model = CustomClassifier()
dm = CustomDatamodule(batch_size=21)
trainer = pl.Trainer()
trainer.fit(model, dm, epochs=37)
How can I train on multiple GPUs?
# dp = DataParallel
trainer = Trainer(gpus=2, distributed_backend='dp')
# ddp = DistributedDataParallel
trainer = Trainer(gpus=2, num_nodes=2, distributed_backend='ddp')
# ddp2 = DistributedDataParallel + dp
trainer = Trainer(gpus=2, num_nodes=2, distributed_backend='ddp2')
trainer = pl.Trainer(tpu_cores=8)
trainer.fit(model, dm)
Check out the official documentation at: https://pytorch-lightning.readthedocs.io/en/stable/
Universal template for designing the training loops
Advanced DL tricks out of the box
You care about cool staff, lightning handles everything else
Feel free to ask any question
Piotr Mazurek
Presentation available at:
tugot17.github.io/Pytorch-Lightning-Presentation