Skip to content

Add Evaluation #31

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ A toolbox of OCR models, algorithms, and pipelines based on MindSpore
- Performance tracking during training (by default, results are be saved in `ckpt_save_dir/result.log`)


## Change log

- 3.8
1. Add evaluation script with arg `ckpt_load_path`
2. Arg `ckpt_save_dir` is moved from `system` to `train` in yaml.
3. Add drop_overflow_update control


## Quick Start (for dev)

### Data preparation
Expand All @@ -31,6 +39,13 @@ python tools/train.py --config configs/det/db_r50_icdar15.yaml
python tools/train.py --config configs/rec/crnn_icdar15.yaml
```

# Evaluation

``` python
python tools/eval.py --config configs/det/db_r50_icdar15.yaml
```



## Build and Test A New Model

Expand Down
7 changes: 4 additions & 3 deletions configs/rec/crnn_icdar15.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ system:
amp_level: 'O0'
seed: 42
val_while_train: True
ckpt_save_dir: './tmp_rec'
drop_overflow_update: True

common:
Expand Down Expand Up @@ -62,6 +61,7 @@ optimizer:
#use_nesterov: True

train:
ckpt_save_dir: './tmp_rec'
dataset_sink_mode: False
dataset:
type: RecDataset
Expand Down Expand Up @@ -99,10 +99,11 @@ train:
shuffle: True # TODO: tbc
batch_size: *batch_size
drop_remainder: True
max_rowsize: 12
num_workers: 2
max_rowsize: 16
num_workers: 10

eval:
ckpt_load_path: './tmp_rec/best.ckpt'
dataset_sink_mode: False
dataset:
type: RecDataset
Expand Down
1 change: 0 additions & 1 deletion mindocr/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def __init__(self, config: dict):
head_name = config.head.pop('name')
self.head = build_head(head_name, in_channels=self.neck.out_channels, **config.head)


self.model_name = f'{backbone_name}_{neck_name}_{head_name}'

def construct(self, x):
Expand Down
17 changes: 16 additions & 1 deletion mindocr/models/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
build models
'''
from typing import Union
from mindspore import load_checkpoint, load_param_into_net
from ._registry import model_entrypoint, list_models, is_model
from .base_model import BaseModel

Expand All @@ -20,7 +21,13 @@ def build_model(config: Union[dict, str], **kwargs): #config: Union[dict,str]):
backbone: dict, a dictionary containing the backbone config, the available keys are defined in backbones/builder.py
neck: dict,
head: dict,
kwargs: if config is a str of model name, kwargs servers as the args for the model.
kwargs: if config is a str of model name, kwargs contains the args for the model.


Example:
>>> net = build_model(cfg['model'])
>>> net = build_model(cfg['model'], ckpt_load_path='./r50_fpn_dbhead.ckpt') # build network and load checkpoint
>>> net = build_model('dbnet_r50', pretrained=True)

'''
if isinstance(config, str):
Expand All @@ -43,5 +50,13 @@ def build_model(config: Union[dict, str], **kwargs): #config: Union[dict,str]):
network = BaseModel(config)
else:
raise ValueError('Type error for config')

# load checkpoint
if 'ckpt_load_path' in kwargs:
ckpt_path = kwargs['ckpt_load_path']
if ckpt_path not in ['', None]:
print(f'INFO: Loading checkpoint from {ckpt_path}')
params = load_checkpoint(ckpt_path)
load_param_into_net(network, params)

return network
2 changes: 1 addition & 1 deletion mindocr/utils/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def set_seed(seed=42):
'''
seed: seed int

Note: to ensure model init stability, rank_id is removed for seed.
Note: to ensure model init stability, rank_id is removed from seed.
'''
#if rank is None:
# rank = 0
Expand Down
87 changes: 87 additions & 0 deletions tools/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
'''
Model evaluation
'''
import sys
sys.path.append('.')

import os
import yaml
import argparse
from addict import Dict

import mindspore as ms
from mindspore import nn

from mindocr.data import build_dataset
from mindocr.models import build_model
from mindocr.postprocess import build_postprocess
from mindocr.metrics import build_metric
from mindocr.utils.callbacks import Evaluator

def main(cfg):
# env init
ms.set_context(mode=cfg.system.mode)
if cfg.system.distribute:
print("WARNING: Distribut mode blocked. Evaluation only runs in standalone mode.")

loader_eval = build_dataset(
cfg.eval.dataset,
cfg.eval.loader,
num_shards=None,
shard_id=None,
is_train=False)
num_batches = loader_eval.get_dataset_size()

# model
assert 'ckpt_load_path' in cfg.eval, f'Please provide \n`eval:\n\tckpt_load_path`\n in the yaml config file '
network = build_model(cfg.model, ckpt_load_path=cfg.eval.ckpt_load_path)
network.set_train(False)

if cfg.system.amp_level != 'O0':
print('INFO: Evaluation will run in full-precision(fp32)')

# TODO: check float type conversion in official Model.eval
#ms.amp.auto_mixed_precision(network, amp_level='O0')

# postprocess, metric
postprocessor = build_postprocess(cfg.postprocess)
# postprocess network prediction
metric = build_metric(cfg.metric)

net_evaluator = Evaluator(network, None, postprocessor, [metric])

# log
print('='*40)
print(
f'Num batches: {num_batches}\n'
)
if 'name' in cfg.model:
print(f'Model: {cfg.model.name}')
else:
print(f'Model: {cfg.model.backbone.name}-{cfg.model.neck.name}-{cfg.model.head.name}')
print('='*40)

measures = net_evaluator.eval(loader_eval)
print('Performance: ', measures)


def parse_args():
parser = argparse.ArgumentParser(description='Evaluation Config', add_help=False)
parser.add_argument('-c', '--config', type=str, default='',
help='YAML config file specifying default arguments (default='')')
args = parser.parse_args()

return args


if __name__ == '__main__':
# argpaser
args = parse_args()
yaml_fp = args.config
with open(yaml_fp) as fp:
config = yaml.safe_load(fp)
config = Dict(config)

#print(config)

main(config)
17 changes: 6 additions & 11 deletions tools/train.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
'''
Model training

TODO:
1. default arg values
2. top-k model saving policy
'''
import sys

sys.path.append('.')

import os
Expand All @@ -30,7 +25,7 @@
from mindocr.metrics import build_metric
from mindocr.utils.model_wrapper import NetWithLossWrapper
from mindocr.utils.train_step_wrapper import TrainOneStepWrapper
from mindocr.utils.callbacks import EvalSaveCallback # TODO: callback in a better dir
from mindocr.utils.callbacks import EvalSaveCallback
from mindocr.utils.seed import set_seed

def main(cfg):
Expand All @@ -50,7 +45,7 @@ def main(cfg):
rank_id = None

set_seed(cfg.system.seed)
cv2.setNumThreads(2) # TODO: proper value
cv2.setNumThreads(2) # TODO: by default, num threads = num cpu cores
is_main_device = rank_id in [None, 0]

# train pipeline
Expand Down Expand Up @@ -100,14 +95,14 @@ def main(cfg):
# postprocess network prediction
metric = build_metric(cfg.metric)

# build callbacks
# build callbacks
eval_cb = EvalSaveCallback(
network,
loader_eval,
postprocessor=postprocessor,
metrics=[metric],
rank_id=rank_id,
ckpt_save_dir=cfg.system.ckpt_save_dir,
ckpt_save_dir=cfg.train.ckpt_save_dir,
main_indicator=cfg.metric.main_indicator)

# log
Expand All @@ -121,14 +116,14 @@ def main(cfg):
f'Scheduler: {cfg.scheduler.scheduler}\n'
f'LR: {cfg.scheduler.lr} \n'
f'drop_overflow_update: {cfg.system.drop_overflow_update}'
)
)
if 'name' in cfg.model:
print(f'Model: {cfg.model.name}')
else:
print(f'Model: {cfg.model.backbone.name}-{cfg.model.neck.name}-{cfg.model.head.name}')
print('='*40)
# save args used for training
with open(os.path.join(cfg.system.ckpt_save_dir, 'args.yaml'), 'w') as f:
with open(os.path.join(cfg.train.ckpt_save_dir, 'args.yaml'), 'w') as f:
args_text = yaml.safe_dump(cfg.to_dict(), default_flow_style=False)
f.write(args_text)

Expand Down