Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PortraitNet. #1132

Merged
merged 14 commits into from
Aug 4, 2021
20 changes: 20 additions & 0 deletions configs/portraitnet/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# PortraitNet: Real-time Portrait Segmentation Network for Mobile Device

## Reference

> Song-Hai Zhanga, Xin Donga, Jia Lib, Ruilong Lia, Yong-Liang Yang. "PortraitNet: Real-time portrait segmentation network for mobile device". @ CAD&Graphics 2019.

## Usage
[https://aistudio.baidu.com/aistudio/projectdetail/1754799](https://aistudio.baidu.com/aistudio/projectdetail/1754799)

## Dataset
[https://pan.baidu.com/s/15uBpR7zFF2zpUccoq5pQYg](https://pan.baidu.com/s/15uBpR7zFF2zpUccoq5pQYg)

password: ajcs

## Performance

| Model | Backbone | Dataset | Resolution | Training Iters | mIoU |
|-|-|-|-|-|-|-|-|
|PortraitNet|MobileNetV2|EG1800|224x224|46000|96.92%|
|PortraitNet|MobileNetV2|Supervise.ly|224x224|60000|93.94%|
78 changes: 78 additions & 0 deletions configs/portraitnet/portraitnet_eg1800_224x224_46k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
batch_size: 64
iters: 46000

train_dataset:
type: EG1800
dataset_root: data/EG1800
common_transforms:
- type: RandomAffine
max_rotation: 45
min_scale_factor: 0.5
max_scale_factor: 1.5
size: [ 224, 224 ]
translation_offset: 56
- type: RandomHorizontalFlip
transforms1:
- type: Normalize
mean: [0.485, 0.458, 0.408]
std: [0.23, 0.23, 0.23]
transforms2:
- type: RandomDistort
brightness_range: 0.6
contrast_range: 0.4
saturation_range: 0.6
hue_prob: 0.0
sharpness_range: 0.2
sharpness_prob: 0.5
- type: RandomBlur
prob: 0.5
blur_type: random
- type: RandomNoise
- type: Normalize
mean: [ 0.485, 0.458, 0.408 ]
std: [ 0.23, 0.23, 0.23 ]
mode: train

val_dataset:
type: EG1800
dataset_root: data/EG1800
common_transforms:
- type: ScalePadding
target_size: [ 224, 224 ]
im_padding_value: [127.5, 127.5, 127.5]
label_padding_value: 0
- type: Normalize
mean: [0.485, 0.458, 0.408]
std: [0.23, 0.23, 0.23]
transforms1: null
transforms2: null
mode: val

optimizer:
type: adam
weight_decay: 5.0e-4

lr_scheduler:
type: StepDecay
learning_rate: 0.001
step_size: 460
gamma: 0.95

loss:
types:
- type: CrossEntropyLoss
- type: CrossEntropyLoss
- type: FocalLoss
edge_label: True
- type: KLLoss
coef: [1, 1, 0.3, 2]

model:
type: PortraitNet
backbone:
type: MobileNetV2
channel_ratio: 1.0
min_channel: 16
pretrained: null
add_edge: True
num_classes: 2
50 changes: 50 additions & 0 deletions configs/portraitnet/portraitnet_supervisely_224x224_60k.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
_base_: './portraitnet_eg1800_224x224_46k.yml'
batch_size: 64
iters: 60000

train_dataset:
type: SUPERVISELY
dataset_root: data/Supervisely_face
common_transforms:
- type: RandomAffine
max_rotation: 45
min_scale_factor: 0.5
max_scale_factor: 1.5
size: [ 224, 224 ]
translation_offset: 56
- type: RandomHorizontalFlip
transforms1:
- type: Normalize
mean: [0.485, 0.458, 0.408]
std: [0.23, 0.23, 0.23]
transforms2:
- type: RandomDistort
brightness_range: 0.6
contrast_range: 0.4
saturation_range: 0.6
hue_prob: 0.0
sharpness_range: 0.2
sharpness_prob: 0.5
- type: RandomBlur
prob: 0.5
blur_type: random
- type: RandomNoise
- type: Normalize
mean: [ 0.485, 0.458, 0.408 ]
std: [ 0.23, 0.23, 0.23 ]
mode: train

val_dataset:
type: SUPERVISELY
dataset_root: data/Supervisely_face
common_transforms:
- type: ScalePadding
target_size: [ 224, 224 ]
im_padding_value: [127.5, 127.5, 127.5]
label_padding_value: 0
- type: Normalize
mean: [0.485, 0.458, 0.408]
std: [0.23, 0.23, 0.23]
transforms1: null
transforms2: null
mode: val
6 changes: 5 additions & 1 deletion paddleseg/core/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,12 @@ def loss_computation(logits_list, labels, losses, edges=None):
logits = logits_list[i]
loss_i = losses['types'][i]
# Whether to use edges as labels According to loss type.
if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
if loss_i.__class__.__name__ in ('BCELoss',
'FocalLoss') and loss_i.edge_label:
loss_list.append(losses['coef'][i] * loss_i(logits, edges))
txyugood marked this conversation as resolved.
Show resolved Hide resolved
elif loss_i.__class__.__name__ in ("KLLoss", ):
loss_list.append(losses['coef'][i] * loss_i(
logits_list[0], logits_list[1].detach()))
else:
loss_list.append(losses['coef'][i] * loss_i(logits, labels))
return loss_list
Expand Down
3 changes: 3 additions & 0 deletions paddleseg/cvlibs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ def learning_rate(self) -> paddle.optimizer.lr.LRScheduler:
elif decay_type == 'piecewise':
values = _learning_rate
return paddle.optimizer.lr.PiecewiseDecay(values=values, **args)
elif decay_type == 'stepdecay':
lr = _learning_rate
return paddle.optimizer.lr.StepDecay(lr, **args)
else:
raise RuntimeError('Only poly and piecewise decay support.')

Expand Down
2 changes: 2 additions & 0 deletions paddleseg/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@
from .optic_disc_seg import OpticDiscSeg
from .pascal_context import PascalContext
from .mini_deep_globe_road_extraction import MiniDeepGlobeRoadExtraction
from .eg1800 import EG1800
from .supervisely import SUPERVISELY
117 changes: 117 additions & 0 deletions paddleseg/datasets/eg1800.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import copy

import cv2
import numpy as np

from paddleseg.datasets import Dataset
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
import paddleseg.transforms.functional as F


@manager.DATASETS.add_component
class EG1800(Dataset):
"""
EG1800 dataset `http://xiaoyongshen.me/webpage_portrait/index.html`.

Args:
common_transforms (list): A list of common image transformations for two inputs of portrait net.
transforms1 (list): A list of image transformations for the first input of portrait net.
transforms2 (list): A list of image transformations for the second input of portrait net.
dataset_root (str, optional): The EG1800 dataset directory. Default: None.
mode (str, optional): A subset of the entire dataset. It should be one of ('train', 'val'). Default: 'train'.
edge (bool, optional): Whether to compute edge while training. Default: False
"""
NUM_CLASSES = 2

def __init__(self,
common_transforms,
transforms1,
transforms2,
dataset_root=None,
mode='train',
edge=False):
self.dataset_root = dataset_root
self.common_transforms = Compose(common_transforms)
self.transforms = self.common_transforms
if transforms1 is not None:
self.transforms1 = Compose(transforms1, to_rgb=False)
if transforms2 is not None:
self.transforms2 = Compose(transforms2, to_rgb=False)
mode = mode.lower()
self.ignore_index = 255
self.mode = mode
self.num_classes = self.NUM_CLASSES
self.input_width = 224
self.input_height = 224

if mode == 'train':
path = os.path.join(dataset_root, 'eg1800_train.txt')
else:
path = os.path.join(dataset_root, 'eg1800_test.txt')
with open(path, 'r') as f:
files = f.readlines()
img_files = [
os.path.join(dataset_root, 'Images', file).strip() for file in files
]
label_files = [
os.path.join(dataset_root, 'Labels', file).strip() for file in files
]

self.file_list = [[
img_path, label_path
] for img_path, label_path in zip(img_files, label_files)]
pass

def __getitem__(self, item):
image_path, label_path = self.file_list[item]
im = cv2.imread(image_path)
label = cv2.imread(label_path, 0)
label[label > 1] = 0

if self.mode == "val":
common_im, label = self.common_transforms(im=im, label=label)
im = np.float32(common_im[::-1, :, :]) # RGB => BGR
im_aug = copy.deepcopy(im)
else:
common_im, label = self.common_transforms(im=im, label=label)
common_im = np.transpose(common_im, [1, 2, 0])
# add augmentation
im, _ = self.transforms1(common_im)
im_aug, _ = self.transforms2(common_im)

im = np.float32(im[::-1, :, :]) # RGB => BGR
im_aug = np.float32(im_aug[::-1, :, :]) # RGB => BGR

label = cv2.resize(
np.uint8(label), (self.input_width, self.input_height),
interpolation=cv2.INTER_NEAREST)

# add mask blur
label = np.uint8(cv2.blur(label, (5, 5)))
label[label >= 0.5] = 1
label[label < 0.5] = 0

edge_mask = F.mask_to_binary_edge(
label, radius=4, num_classes=self.num_classes)
edge_mask = np.transpose(edge_mask, [1, 2, 0]).squeeze(axis=-1)
im = np.concatenate([im_aug, im])
if self.mode == "train":
return im, label, edge_mask
else:
return im, label
Loading