Skip to content

modify print to logger #429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions mindocr/data/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

import mindspore as ms

from ..utils.logger import Logger
from .det_dataset import DetDataset, SynthTextDataset
from .predict_dataset import PredictDataset
from .rec_dataset import RecDataset
from .rec_lmdb_dataset import LMDBDataset

__all__ = ["build_dataset"]
_logger = Logger("mindocr")

supported_dataset_types = [
"BaseDataset",
Expand Down Expand Up @@ -111,8 +113,8 @@ def build_dataset(
) # optimal num workers assuming all cpu cores are used in this job
num_workers = loader_config.get("num_workers", NUM_WORKERS_MAP)
if num_workers > int(cores / num_devices):
print(
f"WARNING: `num_workers` is adjusted to {int(cores / num_devices)} since {num_workers}x{num_devices} "
_logger.warning(
f"`num_workers` is adjusted to {int(cores / num_devices)} since {num_workers}x{num_devices} "
f"exceeds the number of CPU cores {cores}"
)
num_workers = int(cores / num_devices)
Expand All @@ -135,7 +137,6 @@ def build_dataset(
dataset = dataset_class(**dataset_args)

dataset_column_names = dataset.get_output_columns()
# print('=> Dataset output columns: \n\t', dataset_column_names)

# Generate source dataset (source w.r.t. the dataset.map pipeline)
# based on python callable numpy dataset in parallel
Expand All @@ -161,25 +162,25 @@ def build_dataset(

device_id = 0 if shard_id is None else shard_id
is_main_device = device_id == 0
print(
f"INFO: Creating dataloader (training={is_train}) for device {device_id}. Number of data samples: {num_samples}"
_logger.info(
f"Creating dataloader (training={is_train}) for device {device_id}. Number of data samples: {num_samples}"
)

if "refine_batch_size" in kwargs:
batch_size = _check_batch_size(num_samples, batch_size, refine=kwargs["refine_batch_size"])

drop_remainder = loader_config.get("drop_remainder", is_train)
if is_train and drop_remainder is False and is_main_device:
print(
"WARNING: `drop_remainder` should be True for training, otherwise the last batch may lead to training fail "
"in Graph mode"
_logger.warning(
"`drop_remainder` should be True for training, "
"otherwise the last batch may lead to training fail in Graph mode"
)

if not is_train:
if drop_remainder and is_main_device:
print(
"WARNING: `drop_remainder` is forced to be False for evaluation to include the last batch for "
"accurate evaluation."
_logger.warning(
"`drop_remainder` is forced to be False for evaluation "
"to include the last batch for accurate evaluation."
)
drop_remainder = False

Expand Down Expand Up @@ -228,8 +229,8 @@ def _check_batch_size(num_samples, ori_batch_size=32, refine=True):
# search a batch size that is divisible by num samples.
for bs in range(ori_batch_size - 1, 0, -1):
if num_samples % bs == 0:
print(
f"INFO: Batch size for evaluation is refined to {bs} to ensure the last batch will not be "
_logger.info(
f"Batch size for evaluation is refined to {bs} to ensure the last batch will not be "
f"dropped/padded in graph mode."
)
return bs
6 changes: 4 additions & 2 deletions mindocr/data/det_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
import numpy as np
from scipy.io import loadmat

from ..utils.logger import Logger
from .base_dataset import BaseDataset
from .transforms.transforms_factory import create_transforms, run_transforms

__all__ = ["DetDataset", "SynthTextDataset"]
_logger = Logger("mindocr")


class DetDataset(BaseDataset):
Expand Down Expand Up @@ -104,7 +106,7 @@ def __getitem__(self, index):
data = run_transforms(data, transforms=self.transforms)
output_tuple = tuple(data[k] for k in self.output_columns)
except Exception as e:
print(f"Error occurred while processing the image: {self.data_list[index]['img_path']}\n", e, flush=True)
_logger.warning(f"Error occurred while processing the image: {self.data_list[index]['img_path']}\n {e}")
return self[random.randrange(len(self.data_list))] # return another random sample instead

return output_tuple
Expand Down Expand Up @@ -160,7 +162,7 @@ def _parse_annotation(self, data_line: str):

class SynthTextDataset(DetDataset):
def load_data_list(self, label_file: List[str], *args):
print("Loading SynthText dataset. It might take a while...")
_logger.info("Loading SynthText dataset. It might take a while...")
mat = loadmat(label_file[0])

data_list = []
Expand Down
8 changes: 5 additions & 3 deletions mindocr/data/rec_lmdb_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import lmdb
import numpy as np

from ..utils.logger import Logger
from .base_dataset import BaseDataset
from .transforms.transforms_factory import create_transforms, run_transforms

__all__ = ["LMDBDataset"]
_logger = Logger("mindocr")


class LMDBDataset(BaseDataset):
Expand Down Expand Up @@ -104,13 +106,13 @@ def prefetch(self, output_columns):
)

def filter_idx_list(self, idx_list: np.ndarray) -> np.ndarray:
print("Start filtering the idx list...")
_logger.info("Start filtering the idx list...")
new_idx_list = list()
for lmdb_idx, file_idx in idx_list:
label = self.get_lmdb_sample_info(self.lmdb_sets[int(lmdb_idx)]["txn"], int(file_idx), label_only=True)
if len(label) > self.max_text_len:
print(
f"WARNING: skip the label with length ({len(label)}), "
_logger.warning(
f"skip the label with length ({len(label)}), "
f"which is longer than than max length ({self.max_text_len})."
)
continue
Expand Down
17 changes: 10 additions & 7 deletions mindocr/data/transforms/det_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import pyclipper
from shapely.geometry import Polygon, box

from mindocr.utils.logger import Logger

__all__ = [
"DetLabelEncode",
"BorderMap",
Expand All @@ -25,6 +27,7 @@
"GridResize",
"ScalePadImage",
]
_logger = Logger("mindocr")


class DetLabelEncode:
Expand Down Expand Up @@ -384,8 +387,8 @@ def __init__(
if limit_type in ["min", "max"]:
keep_ratio = True
padding = False
print(
f"INFO: `limit_type` is {limit_type}. Image will be resized by limiting the {limit_type} "
_logger.info(
f"`limit_type` is {limit_type}. Image will be resized by limiting the {limit_type} "
f"side length to {limit_side_len}."
)
elif not limit_type:
Expand All @@ -396,12 +399,12 @@ def __init__(
if target_size and force_divisable:
if (target_size[0] % divisor != 0) or (target_size[1] % divisor != 0):
self.target_size = [max(round(x / self.divisor) * self.divisor, self.divisor) for x in target_size]
print(
f"WARNING: `force_divisable` is enabled but the set target size {target_size} "
_logger.warning(
f"`force_divisable` is enabled but the set target size {target_size} "
f"is not divisable by {divisor}. Target size is ajusted to {self.target_size}"
)
if (target_size is not None) and keep_ratio and (not padding):
print("WARNING: output shape can be dynamic if keep_ratio but no padding.")
_logger.warning("output shape can be dynamic if keep_ratio but no padding.")
else:
raise ValueError(f"Unknown limit_type: {limit_type}")

Expand Down Expand Up @@ -468,8 +471,8 @@ def __call__(self, data: dict):
padded_img[:resize_h, :resize_w, :] = resized_img
data["image"] = padded_img
else:
print(
f"WARNING: Image shape after resize is ({resize_h}, {resize_w}), "
_logger.warning(
f"Image shape after resize is ({resize_h}, {resize_w}), "
f"which is larger than target_size {self.target_size}. Skip padding for the current image. "
f"You may disable `force_divisable` to avoid this warning."
)
Expand Down
24 changes: 11 additions & 13 deletions mindocr/data/transforms/rec_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import cv2
import numpy as np

from mindocr.utils.logger import Logger

__all__ = [
"RecCTCLabelEncode",
"RecAttnLabelEncode",
Expand All @@ -16,6 +18,7 @@
"Rotate90IfVertical",
"ClsLabelEncode",
]
_logger = Logger("mindocr")


class RecCTCLabelEncode(object):
Expand Down Expand Up @@ -64,7 +67,6 @@ def __init__(
char_list = [c for c in "0123456789abcdefghijklmnopqrstuvwxyz"]

self.lower = True
# print("INFO: The character_dict_path is None, model can only recognize number and lower letters")
else:
# TODO: this is commonly used in other modules, wrap into a func or class.
# parse char dictionary
Expand All @@ -80,10 +82,9 @@ def __init__(
self.space_idx = len(char_list) - 1
else:
if " " in char_list:
print(
"WARNING: The dict still contains space char in dict although use_space_char is set to be False, "
"because the space char is coded in the dictionary file ",
character_dict_path,
_logger.warning(
"The dict still contains space char in dict although use_space_char is set to be False, "
f"because the space char is coded in the dictionary file {character_dict_path}"
)

self.num_valid_chars = len(char_list) # the number of valid chars (including space char if used)
Expand Down Expand Up @@ -168,7 +169,7 @@ def __init__(
char_list = list("0123456789abcdefghijklmnopqrstuvwxyz")

self.lower = True
print("INFO: The character_dict_path is None, model can only recognize number and lower letters")
_logger.info("The character_dict_path is None, model can only recognize number and lower letters")
else:
# parse char dictionary
char_list = []
Expand All @@ -184,10 +185,9 @@ def __init__(
self.space_idx = len(char_list) + 1
else:
if " " in char_list:
print(
"WARNING: The dict still contains space char in dict although use_space_char is set to be False, "
"because the space char is coded in the dictionary file ",
character_dict_path,
_logger.warning(
"The dict still contains space char in dict although use_space_char is set to be False, "
f"because the space char is coded in the dictionary file {character_dict_path}"
)

self.num_valid_chars = len(char_list) # the number of valid chars (including space char if used)
Expand Down Expand Up @@ -236,11 +236,10 @@ def str2idx(text: str, label_dict: Dict[str, int], max_text_len: int = 23, lower
# TODO: for char not in the dictionary, skipping may lead to None data. Use a char replacement? refer to mmocr
for char in text:
if char not in label_dict:
# print('WARNING: {} is not in dict'.format(char))
continue
char_indices.append(label_dict[char])
if len(char_indices) == 0:
print("WARNING: {} doesnot contain any valid char in the dict".format(text))
_logger.warning("{} doesnot contain any valid char in the dict".format(text))
return None

return char_indices
Expand Down Expand Up @@ -420,7 +419,6 @@ def __call__(self, data):
else:
src_wh_ratio = w / float(h)
resize_w = math.ceil(min(src_wh_ratio, max_wh_ratio) * resize_h)
# print('Rec resize: ', h, w, "->", resize_h, resize_w)
resized_img = cv2.resize(img, (resize_w, resize_h), interpolation=self.interpolation)

# TODO: norm before padding
Expand Down
15 changes: 11 additions & 4 deletions mindocr/data/transforms/transforms_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@

import numpy as np

from mindocr.utils.logger import Logger

from .det_east_transforms import *
from .det_transforms import *
from .general_transforms import *
from .rec_transforms import *
from .svtr_transform import *

__all__ = ["create_transforms", "run_transforms", "transforms_dbnet_icdar15"]
_logger = Logger("mindocr")


# TODO: use class with __call__, to perform transformation
Expand Down Expand Up @@ -48,7 +51,7 @@ def create_transforms(transform_pipeline: List, global_config: Dict = None):
transforms.append(transform_config)
else:
raise TypeError("transform_config must be a dict or a callable instance")
# print(global_config)

return transforms


Expand All @@ -57,11 +60,15 @@ def run_transforms(data, transforms=None, verbose=False):
transforms = []
for i, transform in enumerate(transforms):
if verbose:
print(f"Trans {i}: ", transform)
print("\tInput: ", {k: data[k].shape for k in data if isinstance(data[k], np.ndarray)})
_logger.info(f"Trans {i}: {transform}")
_logger.info(
"\tInput: " + "\t".join([f"{k}: {data[k].shape}" for k in data if isinstance(data[k], np.ndarray)])
)
data = transform(data)
if verbose:
print("\tOutput: ", {k: data[k].shape for k in data if isinstance(data[k], np.ndarray)})
_logger.info(
"\tOutput: " + "\t".join([f"{k}: {data[k].shape}" for k in data if isinstance(data[k], np.ndarray)])
)

if data is None:
raise RuntimeError("Empty result is returned from transform `{transform}`")
Expand Down
Loading