PL-1

PyTorch Lightning

基本结构

首先要继承LightningModule类作为基类,然后在init中实现模型的基本算子,def training_step(self, batch, batch_idx):此函数实现了训练步骤的前向传递、损失和tensorboard的log记录,然后在def configure_optimizers(self):函数中定义优化器即可完成基本的训练构建。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class LitMNIST(LightningModule):
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Sequential(...),
self.dense = torch.nn.Sequential(...)

def forward(self, x):
x = self.conv1(x)
...
x = self.dense(x)
return x

def training_step(self, batch, batch_idx):
x, y = batch
out = self.forward(x)
criterion = nn.CrossEntropyLoss()
loss = criterion(out, y)
self.log('train loss', loss, on_step=True, on_epoch=True, prog_bar=Trur)
return loss

def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=0.02)

if __name__ == '__main__':

transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])

train_loader = DataLoader(MNIST(os.getcwd(), train=True,
download=True,
transform=transforms.ToTensor()),
batch_size=32)

model = LitMNIST()
trainer = pl.Trainer(gpus=[0], max_epochs=2)
trainer.fit(model, train_loader)
torch.save(model.state_dict(), 'm.pt')

自定义数据集

Trick

测试模型输出

1
2
3
4
5
6
7
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # PyTorch v0.4.0
model = Net().to(device)

summary(model, (3, 256, 256))
# data_input = torch.randn([8, 3, 224, 224]).to(device)
# out = model(data_input)
# print(out.size())

验证集 DataLoader

二维卷积中ks为3时,padding为1时,输出图像尺寸不变,池化同理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from typing import Union, List

import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.core.lightning import LightningModule
from pytorch_lightning.loggers import TensorBoardLogger
from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision import transforms, datasets


class Net(LightningModule):
# 模型基本算子
def __init__(self):
super(Net, self).__init__()
self.seq1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=128, out_channels=28, kernel_size=3, padding=1),
nn.Conv2d(in_channels=28, out_channels=14, kernel_size=3, padding=1),

nn.MaxPool2d(stride=1, kernel_size=3, padding=1)
)
self.dense = nn.Sequential(
# 全连接输入维度,最后一层卷积/池化输出channel * 图像width * 图像height
nn.Linear(14 * 224 * 224, 1024),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(1024, 2)
)

# 前向传递
def forward(self, x):
x = self.seq1(x)
# print(x.size())
# x.size(0)--->batch, -1
x = x.view(x.size(0), -1)
x = self.dense(x)
return x

# loss计算
def training_step(self, batch, batch_idx):
x, y = batch
criterion = nn.CrossEntropyLoss()
loss = criterion(self(x), y)
self.log('train loss', loss, on_step=True, on_epoch=True, prog_bar=True)
return loss

# 验证集loss计算
def validation_step(self, batch, batch_idx):
x, y = batch
criterion = nn.CrossEntropyLoss()
loss = criterion(self(x), y)
return {'val_loss': loss}

# 配置优化器
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=0.02)

# 训练数据集
def train_dataloader(self) -> DataLoader:
data_transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
train_data = datasets.ImageFolder(root='dogs-vs-cats/train',
transform=data_transform)
dataset_loader = torch.utils.data.DataLoader(train_data,
batch_size=1, shuffle=True,
num_workers=8)
return dataset_loader

# 验证数据集
def val_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
data_transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
train_data = datasets.ImageFolder(root='dogs-vs-cats/test',
transform=data_transform)
dataset_loader = torch.utils.data.DataLoader(train_data,
batch_size=1, shuffle=False,
num_workers=8)
return dataset_loader


if __name__ == '__main__':
tb_logger = TensorBoardLogger(save_dir='exp')
checkpoint_callback = ModelCheckpoint(
filepath='weights.pt',
verbose=True,
monitor='val_loss',
mode='min'
)
torch.cuda.empty_cache()
model = Net()
# summary(model, (3, 224, 224))
model.train_dataloader()
trainer = pl.Trainer(max_epochs=100, logger=tb_logger, auto_lr_find=True,
auto_scale_batch_size=True, automatic_optimization=True,
checkpoint_callback=checkpoint_callback)
trainer.fit(model)

输入图像尺寸为224x224,卷积层的kernel_size=7,padding=3,stride=2。通过 (224-7+2x3)/2+1 这样计算得到通过这个卷积层输出特征图大小,这里计算得到223/2+1=112.5。这里的 .5 是不是就直接被省略了?输入224而输出是112,最后结果向下取整.

  • Copyright: Copyright is owned by the author. For commercial reprints, please contact the author for authorization. For non-commercial reprints, please indicate the source.
  • Copyrights © 2017-2021 More Star

请我喝杯咖啡吧~

支付宝
微信