재형이의 성장통 일지
  • AlexNet을 사용한 이미지 분류기 실습
    2024년 03월 18일 21시 00분 35초에 업로드 된 글입니다.
    작성자: 재형이
    반응형
     

     

     

     

     

     

     


     

     

     

     

     

    실습 목표

    문제 정의

    • AlexNet
      • AlexNet competed in the ImageNet Large Scale Visual Recognition Challenge on September 30, 2012. The network achieved a top-5 error of 15.3%, more than 10.8 percentage points lower than that of the runner up.

    주요 코드

    1. nn.Conv2d()

    • in_channels
    • out_channels
    • kernel_size
    • stride=1
    • padding=0
    • sample code
    nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=5),

    2. nn.MaxPool2d()

    • kernel_size
    • stride=None
    • padding=0
    • sample code
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),

    3. tensor.view()

    • reshape
    >>> x = torch.randn(4, 4)
    >>> x.size()
    torch.Size([4, 4])
    >>> y = x.view(16)
    >>> y.size()
    torch.Size([16])
    >>> z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
    >>> z.size()
    torch.Size([2, 8])
    • AlexNet 코드 미리보기(구조)
    class AlexNet(nn.Module):
    
        def __init__(self, num_classes=10):
            super(AlexNet, self).__init__()
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=5),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Conv2d(96, 256, kernel_size=5, padding=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Conv2d(256, 384, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 384, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 256, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            )
            self.classifier = nn.Sequential(
                nn.Linear(256 * 2 * 2, 4096),
                nn.ReLU(inplace=True),
                nn.Linear(4096, num_classes),
            )
    
    
        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x

    4. 이미지 증강

    mean = train_img.data.mean(axis=(0,1,2)) / 255
    std = test_img.data.std(axis=(0,1,2)) / 255
    print(f'평균:{mean}, 표준편차:{std}')
    
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
        transforms.RandomCrop(size=train_img.data.shape[1], padding=4),
        transforms.RandomHorizontalFlip(),
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])
    
    train_img2 = datasets.CIFAR10(
        root = 'data',
        train = True,
        download = True,
        transform = transform_train,
    )
    
    test_img2 = datasets.CIFAR10(
        root = 'data',
        train = False,
        download = True,
        transform = transform_test,
    )

    5. Confusion Matrix

    from sklearn.metrics import confusion_matrix

    CIFAR Classifier(AlexNet)

    • CIFAR 데이터셋을 사용하여 이미지에 포함된 object가 무엇인지 분류하는 이미지 분류기를 생성해봅시다

    [Step1] Load libraries & Datasets

    import numpy as np
    import matplotlib.pyplot as plt
    
    import torch
    from torch.utils.data import DataLoader
    from torch import nn
    
    from torchvision import datasets
    from torchvision.transforms import transforms
    from torchvision.transforms.functional import to_pil_image
    
    # CIFAR 데이터 불러오기
    train_img = datasets.CIFAR10(
        root = 'data',
        train = True,
        download = True,
        transform = transforms.ToTensor(),
    )
    
    test_img = datasets.CIFAR10(
        root = 'data',
        train = False,
        download = True,
        transform = transforms.ToTensor(),
    )

    [Step2] Data preprocessing

    • 불러온 이미지의 증강을 통해 학습 정확도를 향상시키도록 합니다.
      • RandomCrop
      • RandomHorizontalFlip
      • Normalize
    mean = train_img.data.mean(axis=(0,1,2)) / 255
    std = train_img.data.std(axis=(0,1,2)) / 255
    print(f'평균:{mean}, 표준편차:{std}')
    
    transform_train = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
        transforms.RandomCrop(size=train_img.data.shape[1], padding=4),
        transforms.RandomHorizontalFlip(),
    ])
    
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])
    
    train_img2 = datasets.CIFAR10(
        root = 'data',
        train = True,
        download = True,
        transform = transform_train,
    )
    
    test_img2 = datasets.CIFAR10(
        root = 'data',
        train = False,
        download = True,
        transform = transform_test,
    )

    [Step3] Set hyperparameters

    EPOCH = 10
    BATCH_SIZE = 128
    LEARNING_RATE = 1e-3
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using Device:", DEVICE)

    [Step4] Create DataLoader

    # DataLoader 만들기
    train_loader = DataLoader(train_img2, batch_size = BATCH_SIZE, shuffle = True)
    test_loader = DataLoader(test_img2, batch_size = BATCH_SIZE, shuffle = False)

    EDA

    print(train_img, '\n------------------\n', test_img)
    
    train_img[0]
    
    train_features, train_labels = next(iter(train_loader))
    print(f"Feature batch shape: {train_features.size()}")
    print(f"Labels batch shape: {train_labels.size()}")
    
    labels_map = {
        0: "plane",
        1: "car",
        2: "bird",
        3: "cat",
        4: "deer",
        5: "dog",
        6: "frog",
        7: "horse",
        8: "ship",
        9: "truck",
    }
    
    figure = plt.figure(figsize = (8, 8))
    cols, rows = 5, 5
    
    for i in range(1, cols * rows +1):
        sample_idx = torch.randint(len(train_img), size=(1,)).item()
        img, label = train_img[sample_idx]
        figure.add_subplot(rows, cols, i)
        plt.title(labels_map[label])
        plt.axis('off')
        plt.imshow(to_pil_image(img))
    plt.show()

    [Step5] Set Network Structure

    class AlexNet(nn.Module):
        def __init__(self, num_classes=10):
            super(AlexNet, self).__init__()
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=11, stride=4),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Conv2d(96, 256, kernel_size=5, padding=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Conv2d(256, 384, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 384, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 256, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            )
            self.classifier = nn.Sequential(
                nn.Linear(256, 4096),
                nn.Dropout(0.5),
                nn.ReLU(inplace=True),
                nn.Linear(4096, num_classes),
            )
    
        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x

    [Step6] Create Model instance

    # Model instance 생성
    model = AlexNet().to(DEVICE)
    print(model)

    [Step7] Model compile

    loss = nn.CrossEntropyLoss()
    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

    [Step8] Set train loop

    def train(train_loader, model, loss_fn, optimizer):
        model.train()
        
        size = len(train_loader.dataset)
        
        for batch, (X, y) in enumerate(train_loader):
            X, y = X.to(DEVICE), y.to(DEVICE)
            pred = model(X)
    
            # 손실 계산
            loss = loss_fn(pred, y)
    
            # 역전파
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            if batch % 100 == 0:
                loss, current = loss.item(), batch * len(X)
                print(f'loss: {loss:>7f}  [{current:>5d}]/{size:5d}')

    [Step9] Set test loop

    def test(test_loader, model, loss_fn):
        model.eval()
    
        size = len(test_loader.dataset)
        num_batches = len(test_loader)
        test_loss, correct = 0, 0
    
        with torch.no_grad():
            for X, y in test_loader:
                X, y = X.to(DEVICE), y.to(DEVICE)
                pred = model(X)
                test_loss += loss_fn(pred, y).item()
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    
        test_loss /= num_batches
        correct /= size
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:8f}\n")

    [Step10] Run model

    for i in range(EPOCH) :
        print(f"Epoch {i+1} \n------------------------")
        train(train_loader, model, loss, optimizer)
        test(test_loader, model, loss)
    print("Done!")
    • Epoch 1 
      ------------------------
      loss: 2.300951  [    0]/50000
      loss: 1.947751  [12800]/50000
      loss: 1.846590  [25600]/50000
      loss: 1.796698  [38400]/50000
      Test Error: 
       Accuracy: 35.6%, Avg loss: 1.672076

      Epoch 2 
      ------------------------
      loss: 1.677636  [    0]/50000
      loss: 1.796524  [12800]/50000
      loss: 1.735486  [25600]/50000
      loss: 1.617321  [38400]/50000
      Test Error: 
       Accuracy: 41.4%, Avg loss: 1.527878

      Epoch 3 
      ------------------------
      loss: 1.436651  [    0]/50000
      loss: 1.666115  [12800]/50000
      loss: 1.519173  [25600]/50000
      loss: 1.700590  [38400]/50000
      Test Error: 
       Accuracy: 47.1%, Avg loss: 1.442199

      Epoch 4 
      ------------------------
      loss: 1.529453  [    0]/50000
      loss: 1.358973  [12800]/50000
      loss: 1.369336  [25600]/50000
      loss: 1.341564  [38400]/50000
      Test Error: 
       Accuracy: 49.6%, Avg loss: 1.398217

      Epoch 5 
      ------------------------
      loss: 1.368124  [    0]/50000
      loss: 1.449574  [12800]/50000
      loss: 1.343148  [25600]/50000
      loss: 1.185346  [38400]/50000
      Test Error: 
       Accuracy: 50.7%, Avg loss: 1.342151

      Epoch 6 
      ------------------------
      loss: 1.502402  [    0]/50000
      loss: 1.323506  [12800]/50000
      loss: 1.461938  [25600]/50000
      loss: 1.256716  [38400]/50000
      Test Error: 
       Accuracy: 51.5%, Avg loss: 1.315709

      Epoch 7 
      ------------------------
      loss: 1.341234  [    0]/50000
      loss: 1.392743  [12800]/50000
      loss: 1.231053  [25600]/50000
      loss: 1.405280  [38400]/50000
      Test Error: 
       Accuracy: 52.5%, Avg loss: 1.312394

      Epoch 8 
      ------------------------
      loss: 1.423609  [    0]/50000
      loss: 1.230737  [12800]/50000
      loss: 1.235436  [25600]/50000
      loss: 1.367993  [38400]/50000
      Test Error: 
       Accuracy: 54.9%, Avg loss: 1.246386

      Epoch 9 
      ------------------------
      loss: 1.268459  [    0]/50000
      loss: 1.293651  [12800]/50000
      loss: 1.296969  [25600]/50000
      loss: 1.324154  [38400]/50000
      Test Error: 
       Accuracy: 54.5%, Avg loss: 1.255810

      Epoch 10 
      ------------------------
      loss: 1.348040  [    0]/50000
      loss: 1.403978  [12800]/50000
      loss: 1.264835  [25600]/50000
      loss: 1.319865  [38400]/50000
      Test Error: 
       Accuracy: 56.2%, Avg loss: 1.224928

      Done!

    [Step11] Confusion Matrix

    import itertools
    def plot_confusion_matrix(cm, target_names=None, cmap=None, 
                              normalize=True, labels=True, title='Confusion matrix'):
        accuracy = np.trace(cm) / float(np.sum(cm))
        misclass = 1 - accuracy
    
        if cmap is None:
            cmap = plt.get_cmap('Blues')
    
        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            
        plt.figure(figsize=(8, 6))
        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title(title)
        plt.colorbar()
    
        thresh = cm.max() / 1.5 if normalize else cm.max() / 2
        
        if target_names is not None:
            tick_marks = np.arange(len(target_names))
            plt.xticks(tick_marks, target_names)
            plt.yticks(tick_marks, target_names)
        
        if labels:
            for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
                if normalize:
                    plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                             horizontalalignment="center",
                             color="white" if cm[i, j] > thresh else "black")
                else:
                    plt.text(j, i, "{:,}".format(cm[i, j]),
                             horizontalalignment="center",
                             color="white" if cm[i, j] > thresh else "black")
    
        plt.tight_layout()
        plt.ylabel('True label')
        plt.xlabel('Predicted label\naccuracy={:0.4f};\
                             misclass={:0.4f}'.format(accuracy, misclass))
        plt.show()
        
        from sklearn.metrics import confusion_matrix
    
    model.eval()
    ylabel = []
    ypred_label = []
    
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        ylabel = np.concatenate((ylabel, targets.cpu().numpy()))
        ypred_label = np.concatenate((ypred_label, predicted.cpu().numpy()))
    
    cnf_matrix = confusion_matrix(ylabel, ypred_label)
    
    plot_confusion_matrix(cnf_matrix, 
                          target_names=labels_map.values(),

     

     

     

     

     

     

     


     

     

     

     

     

     

     

     

    반응형
    댓글