방명록
- AlexNet을 사용한 이미지 분류기 실습2024년 03월 18일 21시 00분 35초에 업로드 된 글입니다.작성자: 재형이반응형
실습 목표
- CNN을 구성하는 주요 연산을 이해하고, 신경망을 설계한다.
- AlexNet을 사용하여 이미지를 학습하고 10개의 카테고리를 갖는 이미지를 분류하는 이미지 분류기를 생성한다. (데이터셋: CIFAR)
- 이미지 증강을 수행하여 모델의 성능을 높이는 방법을 이해한다.
- Confusion Matrix를 생성하여 분류 정확도를 확인한다.
- reference
https://tutorials.pytorch.kr/beginner/basics/buildmodel_tutorial.html
https://tutorials.pytorch.kr/beginner/basics/data_tutorial.html
https://medium.com/@djin31/how-to-plot-wholesome-confusion-matrix-40134fd402a8
문제 정의
- AlexNet
- AlexNet competed in the ImageNet Large Scale Visual Recognition Challenge on September 30, 2012. The network achieved a top-5 error of 15.3%, more than 10.8 percentage points lower than that of the runner up.
주요 코드
1. nn.Conv2d()
- in_channels
- out_channels
- kernel_size
- stride=1
- padding=0
- sample code
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=5),
2. nn.MaxPool2d()
- kernel_size
- stride=None
- padding=0
- sample code
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
3. tensor.view()
- reshape
>>> x = torch.randn(4, 4) >>> x.size() torch.Size([4, 4]) >>> y = x.view(16) >>> y.size() torch.Size([16]) >>> z = x.view(-1, 8) # the size -1 is inferred from other dimensions >>> z.size() torch.Size([2, 8])
- AlexNet 코드 미리보기(구조)
class AlexNet(nn.Module): def __init__(self, num_classes=10): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=5), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) self.classifier = nn.Sequential( nn.Linear(256 * 2 * 2, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x
4. 이미지 증강
mean = train_img.data.mean(axis=(0,1,2)) / 255 std = test_img.data.std(axis=(0,1,2)) / 255 print(f'평균:{mean}, 표준편차:{std}') transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean, std), transforms.RandomCrop(size=train_img.data.shape[1], padding=4), transforms.RandomHorizontalFlip(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean, std), ]) train_img2 = datasets.CIFAR10( root = 'data', train = True, download = True, transform = transform_train, ) test_img2 = datasets.CIFAR10( root = 'data', train = False, download = True, transform = transform_test, )
5. Confusion Matrix
from sklearn.metrics import confusion_matrix
CIFAR Classifier(AlexNet)
- CIFAR 데이터셋을 사용하여 이미지에 포함된 object가 무엇인지 분류하는 이미지 분류기를 생성해봅시다
[Step1] Load libraries & Datasets
import numpy as np import matplotlib.pyplot as plt import torch from torch.utils.data import DataLoader from torch import nn from torchvision import datasets from torchvision.transforms import transforms from torchvision.transforms.functional import to_pil_image # CIFAR 데이터 불러오기 train_img = datasets.CIFAR10( root = 'data', train = True, download = True, transform = transforms.ToTensor(), ) test_img = datasets.CIFAR10( root = 'data', train = False, download = True, transform = transforms.ToTensor(), )
[Step2] Data preprocessing
- 불러온 이미지의 증강을 통해 학습 정확도를 향상시키도록 합니다.
- RandomCrop
- RandomHorizontalFlip
- Normalize
mean = train_img.data.mean(axis=(0,1,2)) / 255 std = train_img.data.std(axis=(0,1,2)) / 255 print(f'평균:{mean}, 표준편차:{std}') transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean, std), transforms.RandomCrop(size=train_img.data.shape[1], padding=4), transforms.RandomHorizontalFlip(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean, std), ]) train_img2 = datasets.CIFAR10( root = 'data', train = True, download = True, transform = transform_train, ) test_img2 = datasets.CIFAR10( root = 'data', train = False, download = True, transform = transform_test, )
[Step3] Set hyperparameters
EPOCH = 10 BATCH_SIZE = 128 LEARNING_RATE = 1e-3 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Using Device:", DEVICE)
[Step4] Create DataLoader
# DataLoader 만들기 train_loader = DataLoader(train_img2, batch_size = BATCH_SIZE, shuffle = True) test_loader = DataLoader(test_img2, batch_size = BATCH_SIZE, shuffle = False)
EDA
print(train_img, '\n------------------\n', test_img) train_img[0] train_features, train_labels = next(iter(train_loader)) print(f"Feature batch shape: {train_features.size()}") print(f"Labels batch shape: {train_labels.size()}") labels_map = { 0: "plane", 1: "car", 2: "bird", 3: "cat", 4: "deer", 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck", } figure = plt.figure(figsize = (8, 8)) cols, rows = 5, 5 for i in range(1, cols * rows +1): sample_idx = torch.randint(len(train_img), size=(1,)).item() img, label = train_img[sample_idx] figure.add_subplot(rows, cols, i) plt.title(labels_map[label]) plt.axis('off') plt.imshow(to_pil_image(img)) plt.show()
[Step5] Set Network Structure
class AlexNet(nn.Module): def __init__(self, num_classes=10): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=11, stride=4), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) self.classifier = nn.Sequential( nn.Linear(256, 4096), nn.Dropout(0.5), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x
[Step6] Create Model instance
# Model instance 생성 model = AlexNet().to(DEVICE) print(model)
[Step7] Model compile
loss = nn.CrossEntropyLoss() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
[Step8] Set train loop
def train(train_loader, model, loss_fn, optimizer): model.train() size = len(train_loader.dataset) for batch, (X, y) in enumerate(train_loader): X, y = X.to(DEVICE), y.to(DEVICE) pred = model(X) # 손실 계산 loss = loss_fn(pred, y) # 역전파 optimizer.zero_grad() loss.backward() optimizer.step() if batch % 100 == 0: loss, current = loss.item(), batch * len(X) print(f'loss: {loss:>7f} [{current:>5d}]/{size:5d}')
[Step9] Set test loop
def test(test_loader, model, loss_fn): model.eval() size = len(test_loader.dataset) num_batches = len(test_loader) test_loss, correct = 0, 0 with torch.no_grad(): for X, y in test_loader: X, y = X.to(DEVICE), y.to(DEVICE) pred = model(X) test_loss += loss_fn(pred, y).item() correct += (pred.argmax(1) == y).type(torch.float).sum().item() test_loss /= num_batches correct /= size print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:8f}\n")
[Step10] Run model
for i in range(EPOCH) : print(f"Epoch {i+1} \n------------------------") train(train_loader, model, loss, optimizer) test(test_loader, model, loss) print("Done!")
- Epoch 1
------------------------
loss: 2.300951 [ 0]/50000
loss: 1.947751 [12800]/50000
loss: 1.846590 [25600]/50000
loss: 1.796698 [38400]/50000
Test Error:
Accuracy: 35.6%, Avg loss: 1.672076
Epoch 2
------------------------
loss: 1.677636 [ 0]/50000
loss: 1.796524 [12800]/50000
loss: 1.735486 [25600]/50000
loss: 1.617321 [38400]/50000
Test Error:
Accuracy: 41.4%, Avg loss: 1.527878
Epoch 3
------------------------
loss: 1.436651 [ 0]/50000
loss: 1.666115 [12800]/50000
loss: 1.519173 [25600]/50000
loss: 1.700590 [38400]/50000
Test Error:
Accuracy: 47.1%, Avg loss: 1.442199
Epoch 4
------------------------
loss: 1.529453 [ 0]/50000
loss: 1.358973 [12800]/50000
loss: 1.369336 [25600]/50000
loss: 1.341564 [38400]/50000
Test Error:
Accuracy: 49.6%, Avg loss: 1.398217
Epoch 5
------------------------
loss: 1.368124 [ 0]/50000
loss: 1.449574 [12800]/50000
loss: 1.343148 [25600]/50000
loss: 1.185346 [38400]/50000
Test Error:
Accuracy: 50.7%, Avg loss: 1.342151
Epoch 6
------------------------
loss: 1.502402 [ 0]/50000
loss: 1.323506 [12800]/50000
loss: 1.461938 [25600]/50000
loss: 1.256716 [38400]/50000
Test Error:
Accuracy: 51.5%, Avg loss: 1.315709
Epoch 7
------------------------
loss: 1.341234 [ 0]/50000
loss: 1.392743 [12800]/50000
loss: 1.231053 [25600]/50000
loss: 1.405280 [38400]/50000
Test Error:
Accuracy: 52.5%, Avg loss: 1.312394
Epoch 8
------------------------
loss: 1.423609 [ 0]/50000
loss: 1.230737 [12800]/50000
loss: 1.235436 [25600]/50000
loss: 1.367993 [38400]/50000
Test Error:
Accuracy: 54.9%, Avg loss: 1.246386
Epoch 9
------------------------
loss: 1.268459 [ 0]/50000
loss: 1.293651 [12800]/50000
loss: 1.296969 [25600]/50000
loss: 1.324154 [38400]/50000
Test Error:
Accuracy: 54.5%, Avg loss: 1.255810
Epoch 10
------------------------
loss: 1.348040 [ 0]/50000
loss: 1.403978 [12800]/50000
loss: 1.264835 [25600]/50000
loss: 1.319865 [38400]/50000
Test Error:
Accuracy: 56.2%, Avg loss: 1.224928
Done!
[Step11] Confusion Matrix
import itertools def plot_confusion_matrix(cm, target_names=None, cmap=None, normalize=True, labels=True, title='Confusion matrix'): accuracy = np.trace(cm) / float(np.sum(cm)) misclass = 1 - accuracy if cmap is None: cmap = plt.get_cmap('Blues') if normalize: cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] plt.figure(figsize=(8, 6)) plt.imshow(cm, interpolation='nearest', cmap=cmap) plt.title(title) plt.colorbar() thresh = cm.max() / 1.5 if normalize else cm.max() / 2 if target_names is not None: tick_marks = np.arange(len(target_names)) plt.xticks(tick_marks, target_names) plt.yticks(tick_marks, target_names) if labels: for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): if normalize: plt.text(j, i, "{:0.4f}".format(cm[i, j]), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") else: plt.text(j, i, "{:,}".format(cm[i, j]), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.ylabel('True label') plt.xlabel('Predicted label\naccuracy={:0.4f};\ misclass={:0.4f}'.format(accuracy, misclass)) plt.show() from sklearn.metrics import confusion_matrix model.eval() ylabel = [] ypred_label = [] for batch_idx, (inputs, targets) in enumerate(test_loader): inputs, targets = inputs.to(DEVICE), targets.to(DEVICE) outputs = model(inputs) _, predicted = outputs.max(1) ylabel = np.concatenate((ylabel, targets.cpu().numpy())) ypred_label = np.concatenate((ypred_label, predicted.cpu().numpy())) cnf_matrix = confusion_matrix(ylabel, ypred_label) plot_confusion_matrix(cnf_matrix, target_names=labels_map.values(),
반응형'인공지능 > 프레임워크 or 라이브러리' 카테고리의 다른 글
Variational Autoencoder (0) 2024.03.20 VGGNet을 사용한 이미지 분류기 실습 (2) 2024.03.19 인공 신경망 코드로 구현해서 다중 분류해보기 (2) (4) 2024.03.17 인공 신경망 코드로 구현해서 다중 분류해보기 (1) (0) 2024.03.16 자전거 대여량 예측 - 선형 회귀, 군집 모델 (클러스터링) 실습 (4) 2024.03.15 다음글이 없습니다.이전글이 없습니다.댓글