Skip to content

add data modules for det and rec, raw training pipeline tested #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ __pycache__/
*.py[cod]
*$py.class

.swp
.swo
*.swp
*.swo

# C extensions
*.so
Expand Down
139 changes: 82 additions & 57 deletions configs/det/db_r50_icdar15.yaml
Original file line number Diff line number Diff line change
@@ -1,87 +1,112 @@
system:
mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
distribute: False
amp_level: 'O0'
seed: 42

model:
type: det
transform: null
backbone:
name: det_resnet50
pretrained: False
neck:
name: FPN
name: DBFPN
out_channels: 256
#use_asf: True
bias: False
use_asf: False # enable it for DB++
head:
name: ConvHead
out_channels: 2
name: DBHead
k: 50
bias: False
adaptive: True
serial: False

loss:
name: L1BalanceCELoss
eps: 0.000001
l1_scale: 10
bce_scale: 5
bce_replace: bceloss

scheduler:
name: "cosine_decay"
min_lr: 0.0
lr: 0.1
warmup_epochs: 4
decay_epochs: 96
scheduler: "cosine_decay"
min_lr: 0.00001
lr: 0.001
num_epochs: 300
warmup_epochs: 0
decay_epochs: 280

optimizer:
name: "momentum"
opt: "momentum"
filter_bias_and_bn: True
momentum: 0.9
weight_decay: 0.0001
loss_scale: 1024
loss_scale: 1.0
use_nesterov: False

train:
dataset_sink_mode: False
dataset:
type: DetDataset
data_dir: /data/ocr_datasets/ic15/text_localization/train
label_files: /data/ocr_datasets/ic15/text_localization/train/train_icdar15_label.txt
#data_dir: /Users/Samit/Data/datasets/ic15/det/train
#label_files: /Users/Samit/Data/datasets/ic15/det/train/train_icdar2015_label.txt
sample_ratios: [1.0]
shuffle: True,
shuffle: True
transform_pipeline:
- DecodeImage:
img_mode: BGR,
to_float32: False
- DetLabelEncode:
- MZResizeByGrid:
#denominator: 32
divisor: 32
transform_polys: True # originally in modelzoo, it doesn't transform polys
- MZRandomScaleByShortSide:
short_side: 736
- IaaAugment:
augmenter_args:
- {'type': 'Affine', 'args': {'rotate': [-10, 10]}}
- {'type': 'Fliplr', 'args': {'p': 0.5}}
- MZRandomCropData:
max_tries: 100
min_crop_side_ratio: 0.1
crop_size: [640, 640]
- MZResizeByGrid:
#denominator: 32
divisor: 32
transform_polys: True
#- MakeShrinkMap:
- MZMakeSegDetectionData:
min_text_size: 8
shrink_ratio: 0.4
#- 'MakeBorderMap':
- MZMakeBorderMap:
shrink_ratio: 0.4
thresh_min: 0.3
thresh_max: 0.7
- MZRandomColorAdjust:
brightness: 32.0 / 255
saturation: 0.5
to_numpy: True
#{'MZIrregularNormToCHW': None},
- NormalizeImage:
bgr_to_rgb: True
is_hwc: True
mean : [123.675, 116.28, 103.53]
std : [58.395, 57.12, 57.375]
- ToCHWImage:
output_keys: ['img_path', 'image']
- DecodeImage:
img_mode: BGR
to_float32: False
- DetLabelEncode:
- MZResizeByGrid:
#denominator: 32
divisor: 32
transform_polys: True # originally in modelzoo, it doesn't transform polys
- MZRandomScaleByShortSide:
short_side: 736
- IaaAugment:
augmenter_args:
- {'type': 'Affine', 'args': {'rotate': [-10, 10]}}
- {'type': 'Fliplr', 'args': {'p': 0.5}}
- MZRandomCropData:
max_tries: 100
min_crop_side_ratio: 0.1
crop_size: [640, 640]
- MZResizeByGrid:
#denominator: 32
divisor: 32
transform_polys: True
#- MakeShrinkMap:
- MZMakeSegDetectionData:
min_text_size: 8
shrink_ratio: 0.4
#- 'MakeBorderMap':
- MZMakeBorderMap:
shrink_ratio: 0.4
thresh_min: 0.3
thresh_max: 0.7
- MZRandomColorAdjust:
brightness: 0.1255 #32.0 / 255
saturation: 0.5
to_numpy: True
#{'MZIrregularNormToCHW': None},
- NormalizeImage:
bgr_to_rgb: True
is_hwc: True
mean : [123.675, 116.28, 103.53]
std : [58.395, 57.12, 57.375]
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visaulize
output_keys: ['image', 'shrink_map', 'shrink_mask', 'threshold_map', 'threshold_mask'] #'img_path']
num_keys_to_net: 1 # num inputs for network forward func
#keys_for_loss: 4 # num labels for loss func

loader:
shuffle: True # TODO: tbc
batch_size: 8
batch_size: 16
drop_remainder: True
max_rowsize: 6
num_workers: 4
num_workers: 2

32 changes: 32 additions & 0 deletions configs/det/db_r50_icdar15.yaml.bakup
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@



model:
type: det
transform: null
backbone:
name: det_resnet50
pretrained: False
neck:
name: FPN
out_channels: 256
#use_asf: True
head:
name: ConvHead
out_channels: 2
k: 50

scheduler:
name: "cosine_decay"
min_lr: 0.0
lr: 0.1
warmup_epochs: 4
decay_epochs: 96

optimizer:
name: "momentum"
filter_bias_and_bn: True
momentum: 0.9
weight_decay: 0.0001
loss_scale: 1024
use_nesterov: False
89 changes: 89 additions & 0 deletions configs/rec/crnn_icdar15.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
system:
mode: 1 # 0 for graph mode, 1 for pynative mode in MindSpore
distribute: False
amp_level: 'O0'
seed: 42

common:
character_dict_path: &character_dict_path #mindocr/utils/dict/en_dict.txt
num_classes: &num_classes 37 # num_chars_in_dict+1, TODO: retreive it from dict or check correctness
max_text_len: &max_text_len 23
infer_mode: &infer_mode False
use_space_char: &use_space_char False
batch_size: &batch_size 32

model:
type: rec
transform: null
backbone:
name: rec_vgg7 #resnet34@mindcv
pretrained: False
neck:
name: RNNEncoder
hidden_size: 256
head:
name: CTCHead
out_channels: *num_classes

loss:
name: CTCLoss
pred_seq_len: 24 # TODO: this should be retrieved from the network output shape.
max_label_len: *max_text_len # this value should be smaller than pre_seq_len
batch_size: *batch_size

scheduler:
scheduler: "cosine_decay"
min_lr: 0.000001
lr: 0.001
num_epochs: 100
warmup_epochs: 10
decay_epochs: 90

optimizer:
opt: "momentum"
filter_bias_and_bn: True
momentum: 0.9
weight_decay: 0.0001
loss_scale: 1.0
use_nesterov: True

train:
dataset_sink_mode: False
dataset:
type: RecDataset
#data_dir: /Users/Samit/Data/datasets/ic15/rec/ch4_training_word_images_gt
#label_files: /Users/Samit/Data/datasets/ic15/rec/rec_gt_train.txt
data_dir: /data/ocr_datasets/ic15/word_recognition/ch4_training_word_images_gt
label_files: /data/ocr_datasets/ic15/word_recognition/rec_gt_train.txt
sample_ratios: [1.0]
shuffle: True
transform_pipeline:
- DecodeImage:
img_mode: BGR
to_float32: False
- RecCTCLabelEncode:
max_text_len: *max_text_len
character_dict_path: *character_dict_path
use_space_char: *use_space_char
- RecResizeImg: # different from paddle (paddle converts image from HWC to CHW and rescale to [-1, 1] after resize.
image_shape: [32, 100] # H, W
infer_mode: *infer_mode
character_dict_path: *character_dict_path
padding: True # aspect ratio will be preserved if true.
- NormalizeImage: # different from paddle (paddle wrongly normalize BGR image with RGB mean/std from ImageNet for det, and simple rescale to [-1, 1] in rec.
bgr_to_rgb: True
is_hwc: True
mean : [127.0, 127.0, 127.0]
std : [127.0, 127.0, 127.0]
- ToCHWImage:
# the order of the dataloader list, matching the network input and the input labels for the loss function, and optional data for debug/visaulize
output_keys: ['image', 'text_seq'] #, 'length'] #'img_path']
num_keys_to_net: 1 # num inputs for network forward func
#keys_for_loss: 4 # num labels for loss func

loader:
shuffle: True # TODO: tbc
batch_size: *batch_size
drop_remainder: True
max_rowsize: 6
num_workers: 2
Binary file renamed mindocr/data/.DS_Store → mindocr/.DS_Store
Binary file not shown.
Loading