Convolution Neural Network

47009 단어 딥러닝딥러닝

📒 Convolution


📝 What is Convolution


  • 이미지 위에서 stride 값 만큼 filter를 이동
    👉 겹쳐지는 부분의 각 원소의 값을 곱해서 모두 더한 값을 출력으로 하는 연산
  • 각 인덱스에서 두 행렬을 mul 한 뒤, sum 연산을 취한다고 생각하면 될 듯 하다.
  • stride : filter를 한번에 얼마나 이동할 것인가
  • padding : 0으로 이미지 상하좌우에 padding 값 만큼 감싼다.

📝 In PyTorch


  • PyTorch에서는 torch.nn.Conv2d로 지원한다.
  • Output size는 하단과 같은 수식으로 구할 수 있다.
import torch

conv = nn.Conv2d(1, 1, 11, stride=4, padding=0)
inputs = torch.Tensor(1, 1, 227, 227)
out = conv(inputs)
print(out.shape)
# torch.Size([1, 1, 55, 55])

📝 Pooling


  • Max Pooling
  • Average Polling
  • 하단 그림과 같은 역할을 하는 CNN을 만들어보자.
input = torch.Tensor(1, 1, 28, 28)
conv1 = nn.Conv2d(1, 5, 5)
pool = nn.MaxPool2d(2)
out = conv1(input)
out2 = pool(out)

📒 MNIST CNN


📝 문제

📝 학습단계


  1. 라이브러리 가져오기 (torch, torchvision, matplotlib 등)
  2. GPU 사용 설정, random value를 위한 seed 설정
  3. 학습에 사용되는 parameter 설정 (learning_rate, training_epochs, batch_size, etc)
  4. 데이터셋을 가져오고 dataLoader 만들기
  5. 학습 모델 만들기 (class CNN(torch.nn.Module))
  6. Loss Function (Criterion)을 선택하고 최적화 도구 선택 (optimizer)
  7. 모델 학습 및 loss check (Criterion의 output)
  8. 학습된 모델의 성능을 확인한다.

📝 Code


import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn as nn

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(777) # random value 고정
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

# parameters
learning_rate = 0.001
traning_epochs = 15
batch_size = 100

# MNIST dataset
mnist_train = dsets.MNIST(
    root='MNIST_data/',
    train=True, 
    transform=transforms.ToTensor(), 
    download=True
    )
mnist_test = dsets.MNIST(
    root='MNIST_data/', 
    train=False, 
    transform = transforms.ToTensor(), 
    download=True
    )
data_loader = torch.utils.data.DataLoader(
    dataset=mnist_train, 
    batch_size=batch_size, 
    shuffle=True, 
    drop_last=True
    )

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Linear(7*7*64, 10, bias=True)
        nn.init.xavier_uniform_(self.fc.weight)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


model = CNN().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#traning
total_batch = len(data_loader)
for epoch in range(traning_epochs):
    avg_cost = 0
    # image, label
    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad() # 반드시 gradient를 초기화
        hypothesis = model(X)

        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch
    
    print(f'[Epoch : {epoch + 1}] cost = {avg_cost}')

# eval
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print(f'Accuracy : {accuracy.item()}')

visdom

📒 VGG


import torch.nn as nn
import torch.utils.model_zoo as model_zoo


__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]


model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}


class VGG(nn.Module):

    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        self.features = features # 쌓아나갈 Convolution layer
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        ) # FC layer
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x) # Convolution
        x = self.avgpool(x)  # avgpool
        x = x.view(x.size(0), -1) # 1열로 펼침
        x = self.classifier(x) # FC layer
        return x

    def _initialize_weights(self):
        for m in self.modules(): # feature가 넘겨준 layer의 값
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # 초기화
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3 # input channel 고정
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v # conv2d를 나오면 채널이 변경된다.
    return nn.Sequential(*layers)


cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 8 + 3 = vgg11
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 10 + 3 = vgg13
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], # 13 + 3 = vgg16
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 16 + 3 = vgg19
}


def vgg11(pretrained=False, **kwargs):
    """VGG 11-layer model (configuration "A")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    if pretrained:
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfg['A']), **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
    return model


def vgg11_bn(pretrained=False, **kwargs):
    """VGG 11-layer model (configuration "A") with batch normalization

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    if pretrained:
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn']))
    return model

좋은 웹페이지 즐겨찾기