์ฌ์ง ์ ์ฒ๋ฆฌ ์ดํ, ๋ชจ๋ธ์ ์ ์ ํ๊ณ ํ์ต์ํค๋ ๊ณผ์ ์ ๊ฑฐ์ณค๋ค.
๋ชจ๋ธ์ ํ๋ณด๋ ResNet, EfficientNet, Arcface๊ฐ ์์๋๋ฐ, ๊ฐ์ฅ ์ผ๊ตด ์ ์ฌ๋ ๋ถ๋ถ์์ ํฐ ์ฑ๋ฅ์ ๋ณด์ด๋ Arcface ๋ชจ๋ธ์ ์ ์ ํ๋ค.
๋ ์์ธํ ๊ณผ์ ์ ์ฝ๋ฉ์ ํตํด ํ์ธํ ์ ์๋ค.
1. CSV ํ์ผ ์์ฑ
์ฐ์ ์ ์ฒ๋ฆฌ๋ ์ฌ์ง๋ค์ ์ด๋ฆ(์ด๋ฆ_์๋ฒ ํํ)๊ณผ ํด๋น ์ธ๋ฌผ๋ค์ด label ๋ csv ํ์ผ์ ์์ฑํ๋ค.
2. Train, Test dataset ๋ถ๋ฆฌ
ํญ๋ชฉ๋ณ๋ก 70%๋ train, 30%๋ test์ ๋ฐ์ดํฐ์ ์ผ๋ก ๋ถ๋ฆฌํ๋ค.
# ์ด๋์
์ ํ๋์ ๋ฐฐ์ด๋ก ๋ชจ์ผ๊ธฐ
class_name_list = []
tmp=df.copy()
for initial in tmp['class']:
if initial not in class_name_list:
class_name_list.append(initial)
print(class_name_list) #['SMO', 'CHJ', 'CDE', 'HAR', 'JJJ', 'JSI', 'OJY', 'SHE', 'SHG', 'IDH', 'IJH']
# train, valid ๋ณ dataframe ์์ฑ
train = pd.DataFrame(columns=tmp.columns)
valid = pd.DataFrame(columns=tmp.columns)
# train_test_split ํจ์๋ฅผ ์ด์ฉํ์ฌ 30%๋ test, 70%๋ train์ผ๋ก ๋ถ๋ฆฌ
for class_name in class_name_list:
tmp_with_class = tmp.loc[tmp['class'] == class_name]
train_tmp, valid_tmp = train_test_split(tmp_with_class, test_size = 0.3, random_state = 42)
train_tmp['class'] = class_name
valid_tmp['class'] = class_name
train = pd.concat([train, train_tmp])
valid = pd.concat([valid, valid_tmp])
์ด๋ ๊ฒ train ์ด๋ผ๋ ๋ณ์์๋ ๊ฐ ์ด๋์ ์ ํด๋นํ๋ ๋ฐ์ดํฐ์ 70%๊ฐ,
test๋ผ๋ ๋ณ์์๋ ๊ฐ ์ด๋์ ์ ํด๋นํ๋ ๋ฐ์ดํฐ์ 30%๊ฐ ํ ๋น๋์๋ค.
3. One-hot encoding ์ ์ฉ
one_hot_encoded = pd.get_dummies(tmp['class'])
train_one_hot_encoded = pd.get_dummies(train['class'])
valid_one_hot_encoded = pd.get_dummies(valid['class'])
data = pd.concat([tmp, one_hot_encoded], axis=1)
data = data.drop(['class'], axis=1)
train = pd.concat([train, train_one_hot_encoded], axis=1)
train = train.drop(['class'], axis=1)
valid = pd.concat([valid, valid_one_hot_encoded], axis=1)
valid = valid.drop(['class'], axis=1)
valid
์ดํ, ๊ฐ ์ด๋์ ์ ๋ํด one-hot encoding์ ์ ์ฉํ๋๋ ๊ฒฐ๊ณผ๊ฐ ๋ค์๊ณผ ๊ฐ์ด ๋์๋ค.
4. CustomDataset
์ฐ์ ๊ฐ๊ฐ์ ๋ฐ์ดํฐ์ albumentations ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ฌ์ฉํ์ฌ data augmentation์ ์งํํ๋ค.
train_transform = A.Compose([
A.Resize(224, 224),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.5),
A.RandomBrightnessContrast(brightness_limit=(-0.3, 0.3), contrast_limit=(-0.3, 0.3), p=1),
A.ChannelShuffle(p=0.2),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2()
])
valid_transform = A.Compose([
A.Resize(224, 224),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
ToTensorV2()
])
์ด๋ train๊ณผ valid๋ฅผ ๋ค๋ฅด๊ฒ ์ฆ๊ฐ์ํจ ์ด์ ๋ ๋ํ์ ์ผ๋ก ๊ณผ์ ํฉ(Overfitting)์ ๋ฐฉ์งํ๊ธฐ ์ํด์์ด๋ค.
์ ์ฒด ๋ฐ์ดํฐ์ ์ ์ฌ์ฉํ๋ฉด ๋ชจ๋ธ์ด ๋ฐ์ดํฐ๋ฅผ ์ธ์ฐ๋ ํ์์ธ ๊ณผ์ ํฉ์ด ๋ฐ์ํ ๊ฐ๋ฅ์ฑ์ด ๋์์ง๋ค.
๋ฐ๋ผ์ ๋ชจ๋ธ์ด ์ผ๋ฐํํ ์ ์๋ ๋ฅ๋ ฅ์ ํฅ์ํ๊ธฐ ์ํด์๋ ์ผ๋ถ ๋ฐ์ดํฐ๋ฅผ ๋ผ์ด๋ด์ด ๊ฒ์ฆ(validation)์ ์ฌ์ฉํ๋ ๊ฒ์ด ์ข๋ค.
์ดํ, ๋ค์๊ณผ ๊ฐ์ด CustomDataset์ ์ ์ํ๋ค.
class CustomDataset(Dataset):
def __init__(self, file_list, label_list, transform=None):
self.file_list = file_list
self.label_list = label_list
self.transform = transform
def __len__(self):
return len(self.file_list)
def __getitem__(self, index):
image = cv2.imread(self.file_list[index])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # BGR -> RGB
if self.transform:
transformed = self.transform(image=image, force_apply=False)
image = transformed["image"]
label = self.label_list[index]
return image, label
์์ฑํ CustomDataset์ ์ด์ฉํด์ Dataloader๋ฅผ ์ ์ฉํ๋ค.
from torch.utils.data import DataLoader
# ํ์ผ ์ด๋ฆ ๋ฐ ๋ ์ด๋ธ ๋ชฉ๋ก ์ถ์ถ
train_files = train["file_name"].tolist()
train_labels = train.drop(["file_name", "index"], axis=1).values
valid_files = valid["file_name"].tolist()
valid_labels = valid.drop(["file_name", "index"], axis=1).values
# CustomDataset ์ ์
train_dataset = CustomDataset(train_files, train_labels, transform=train_transform)
valid_dataset = CustomDataset(valid_files, valid_labels, transform=valid_transform)
# DataLoader ์ ์
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
๋ค์ ๊ฒฐ๊ณผ๋ฅผ ํ์ธํด๋ณด์.
for x,y in valid_loader:
print(f'Image Shape: {x.shape}')
print(f'Label Shape: {y.shape}')
break
์ด๋ฏธ์ง์ shape์ด ์๋ํ๋ ๋๋ก 224 * 224๋ก ๋์ค๊ณ (3์ RGB๋ฅผ ๋ปํ๋ค.), label๋ ์ด๋์ ๊ฐ์์ธ 11๊ฐ๋ก ์ ๋์จ ๊ฒ์ ๋ณผ ์ ์๋ค.
5. Model ์ ์
๋๋ฌด ๊ธธ์ด์ ์์ธํ arcface ๋ชจ๋ธ์ ๊ดํด์๋ ๋ค์ ๋ธ๋ก๊ทธ์์ ์ ๋ฆฌํ๊ธฐ๋ก ํ๊ณ , ์์ฑํ ์ฝ๋๋ง ์ฒจ๋ถํด ๋ณด๋๋ก ํ๊ฒ ๋ค.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import math
class ArcMarginProduct(nn.Module):
def __init__(self, in_features, out_features, scale=30.0, margin=0.50, easy_margin=False, device='cuda'):
super(ArcMarginProduct, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.scale = scale
self.margin = margin
self.device = device
self.weight = nn.Parameter(torch.FloatTensor(out_features, in_features))
nn.init.xavier_uniform_(self.weight)
self.easy_margin = easy_margin
self.cos_m = math.cos(margin)
self.sin_m = math.sin(margin)
self.th = math.cos(math.pi - margin)
self.mm = math.sin(math.pi - margin) * margin
def forward(self, input, label):
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where(cosine > self.th, phi, cosine - self.mm)
one_hot = torch.zeros(cosine.size(), device=input.device)
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output *= self.scale
return output
class CustomArcFaceModel(nn.Module):
def __init__(self, num_classes, device='cuda'):
super(CustomArcFaceModel, self).__init__()
self.device = device
self.backbone = nn.Sequential(*list(models.resnet50(pretrained=True).children())[:-1])
self.arc_margin_product = ArcMarginProduct(2048, num_classes, device=self.device)
nn.init.kaiming_normal_(self.arc_margin_product.weight)
def forward(self, x, labels=None):
features = self.backbone(x)
features = F.normalize(features)
features = features.view(features.size(0), -1)
if labels is not None:
logits = self.arc_margin_product(features, labels)
return logits
return features
def cosine_similarity(self, x1, x2):
return torch.dot(x1, x2) / (torch.norm(x1) * torch.norm(x2))
def find_most_similar_celebrity(self, user_face_embedding, celebrity_face_embeddings):
max_similarity = -1
most_similar_celebrity_index = -1
for i, celebrity_embedding in enumerate(celebrity_face_embeddings):
similarity = self.cosine_similarity(user_face_embedding, celebrity_embedding)
if similarity > max_similarity:
max_similarity = similarity
most_similar_celebrity_index = i
return most_similar_celebrity_index, max_similarity
์ด๋ฏธ์ง๊ฐ ์ ๋ ฅ๋์ ๋, ์ ์ฝ๋๋ฅผ ๊ฑฐ์นจ์ผ๋ก ๋ค์๊ณผ ๊ฐ์ ๊ณผ์ ์ด ์ผ์ด๋๋ค:
- ์ ๋ ฅ ์ด๋ฏธ์ง๋ฅผ CustomArcFaceModel์ forward ๋ฉ์๋์ ์ ๋ฌํ๋ค.
- ์ ๋ ฅ ์ด๋ฏธ์ง๋ self.backbone์ผ๋ก ์ง์ ๋ ๋ฐฑ๋ณธ ๋ชจ๋ธ(์ฌ๊ธฐ์๋ ResNet-50)์ ํต๊ณผํ๋ค.
- ๋ฐฑ๋ณธ ๋ชจ๋ธ์ ์ด๋ฏธ์ง๋ฅผ ํน์ง ๋งต(feature map)์ผ๋ก ๋ณํํ๋ค.
- ํน์ง ๋งต์ ์ ๊ทํ(normalization)๋๋ค.
- ์ ๊ทํ๋ ํน์ง ๋งต์ ๋ฒกํฐ๋ก ํผ์ณ์ง๋ค.
- ํผ์ณ์ง ํน์ง ๋ฒกํฐ๋ self.arc_margin_product์ธ ArcMarginProduct ๋ ์ด์ด๋ฅผ ํต๊ณผํ๋ค.
- ArcMarginProduct๋ ์ ๋ ฅ ๋ฒกํฐ์ ๋ ์ด๋ธ์ ๋ฐ์์, cosine ์ ์ฌ๋๋ฅผ ๊ณ์ฐํ๊ณ ArcFace ์์ค ํจ์๋ฅผ ์ ์ฉํ์ฌ ๋ก์ง(logits)์ ๊ณ์ฐํ๋ค.
- ๊ณ์ฐ๋ ๋ก์ง์ ๋ฐํ๋๋ค.
๋ฐ๋ผ์, CustomArcFaceModel ํด๋์ค๋ฅผ ์ฌ์ฉํ๋ฉด ์ ๋ ฅ ์ด๋ฏธ์ง๊ฐ ๋ฐฑ๋ณธ ๋ชจ๋ธ์ธ ResNet-50์ ํต๊ณผํ์ฌ ํน์ง ๋ฒกํฐ๋ก ๋ณํ๋๊ณ , ์ด ํน์ง ๋ฒกํฐ๋ ArcMarginProduct ๋ ์ด์ด๋ฅผ ํตํด ๋ก์ง์ผ๋ก ๋ณํ๋๋ค.
6. ๋ชจ๋ธ train
train ์, epoch๋ง๋ค ์ถ๋ ฅ๊ฐ์ด ๋์ค๋๋ก ์ฝ๋๋ฅผ ์์ฑํ๋ค.
from tqdm import tqdm
def train(model, optimizer, criterion, train_loader, valid_loader, device, epochs):
model.to(device)
best_accuracy = 0.0
for epoch in range(epochs):
model.train()
train_loss = 0.0
train_corrects = 0
for x, y in tqdm(train_loader, desc=f"Epoch {epoch + 1} - Training"):
x = x.to(device)
# y = y.to(device)
y = torch.argmax(y, dim=1).to(device)
optimizer.zero_grad()
output = model(x, y)
loss = criterion(output, y)
loss.backward()
optimizer.step()
_, preds = torch.max(output, 1)
train_loss += loss.item() * x.size(0)
train_corrects += torch.sum(torch.eq(torch.round(preds), y.data)).float()
train_loss = train_loss / len(train_loader.dataset)
train_accuracy = train_corrects.double() / len(train_loader.dataset)
model.eval()
valid_loss = 0.0
valid_corrects = 0
with torch.no_grad():
for x, y in tqdm(valid_loader, desc=f"Epoch {epoch + 1} - Validation"):
x = x.to(device)
# y = y.to(device)
y = torch.argmax(y, dim=1).to(device)
output = model(x, y)
loss = criterion(output, y)
_, preds = torch.max(output, 1)
valid_loss += loss.item() * x.size(0)
valid_corrects += torch.sum(torch.eq(torch.round(preds), y.data)).float()
valid_loss = valid_loss / len(valid_loader.dataset)
valid_accuracy = valid_corrects.double() / len(valid_loader.dataset)
print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_accuracy:.4f}")
if valid_accuracy > best_accuracy:
best_accuracy = valid_accuracy
torch.save(model.state_dict(), "arcface.pth")
return model
# ํ์ดํผํ๋ผ๋ฏธํฐ ์ค์
num_classes = 11 # ๋ถ๋ฅํ ํด๋์ค์ ์ (CDE, CHJ, HAR, IDH, IJH, JJJ, JSI, OJY, SHE, SHG, SMO)
embedding_size = 2048
learning_rate = CFG['LEARNING_RATE']
epochs = 150
# ๋ชจ๋ธ ์์ฑ
model = CustomArcFaceModel(num_classes)
# ์ตํฐ๋ง์ด์ ๋ฐ ์์ค ํจ์ ์ค์
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)
# ๋ชจ๋ธ ํ์ต
trained_model = train(model, optimizer, criterion, train_loader, valid_loader, device, epochs)
์ดํ, ํ์ดํผํ๋ผ๋ฏธํฐ๋ฅผ ํ ๋นํ์ฌ ๋ชจ๋ธ์ ํ์ต์์ผฐ๋๋, ๋ค์๊ณผ ๊ฐ์ด ์ต์ข ๊ฒฐ๊ณผ๊ฐ ๋์๋ค.
๊ฒฐ๊ณผ๋ฅผ ๋ณด๋ฉด, train accuracy๋ ๋์ ๊ฒ์ ๋นํด valid accuracy๊ฐ ๋ฎ์ ๊ฒ์ ๋ณผ ์ ์๋ค. ์ด๋ ๊ณผ์ ํฉ ํ์์ผ๋ก ํ๋จํ ์ ์๋ค.
ํ์ดํผํ๋ผ๋ฏธํฐ๋ฅผ ์กฐ๊ธ ๋ ์กฐ์ ํ์ฌ train ์์ผ๋ณผ ์์ ์ด๋ค.
'๐ป ํ๋ก์ ํธ > ๐งธ TOY-PROJECTS' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
[DeepLook] 6. ์ต์ข ๊ฒฐ๊ณผ๋ฌผ, ์ดํ ์๋ฌ ํธ๋ค๋ง๊ณผ ๋ง๋ฌด๋ฆฌ (2) | 2023.06.21 |
---|---|
[DeepLook] 5. ๋ฐฑ์๋ ์ฐ๊ฒฐ (0) | 2023.06.21 |
[DeepLook] 3. ์ ์ฒ๋ฆฌ (haar-cascade ์๊ณ ๋ฆฌ์ฆ) (0) | 2023.06.20 |
[DeepLook] 2. AI ์์ ์ค๊ณ ๊ณผ์ / ํฌ๋กค๋ง (0) | 2023.06.20 |
[DeepLook] 1. ์์ํ๊ฒ ๋ ๊ณ๊ธฐ / ํ๋ก์ ํธ IA (0) | 2023.06.20 |