Skip to content

Commit 506c06a

Browse files
committed
Merge branch 'develop'
2 parents 5476179 + 2577c79 commit 506c06a

File tree

11 files changed

+294
-8
lines changed

11 files changed

+294
-8
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
## [1.2.1] - 2024-03-12
2+
### Changed
3+
- Fixed many minor bugs
4+
5+
### Added
6+
- Added `mltu.transformers.ImageNormalizer` to normalize and transpose images
7+
- Added `mltu.torch.yolo.annotation.VOCAnnotationReader` to read VOC annotation files
8+
- Added `mltu.torch.yolo.preprocessors.YoloPreprocessor` to preprocess images and annotations for YoloV8 detection model
9+
110
## [1.2.0] - 2024-03-05
211
### Changed
312
- Creating code to work with Ultralytics YoloV8 Detection model (training and inference)

mltu/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.2.0"
1+
__version__ = "1.2.1"
22

33
from .annotations.images import Image
44
from .annotations.images import CVImage

mltu/annotations/detections.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ def validate(self):
114114
if self.width is None or self.height is None:
115115
raise ValueError("width and height must be provided when relative is False")
116116

117+
if (np.array(self.bbox) > 1.0).any():
118+
raise ValueError("bbox coordinates must be in range [0, 1] when relative is False")
119+
117120
bbox = np.array(self.bbox) / np.array([self.width, self.height, self.width, self.height])
118121

119122
else:
@@ -132,11 +135,14 @@ def validate(self):
132135
raise ValueError(f"bbox_type {self.bbox_type} not supported")
133136

134137
def flip(self, direction: int):
138+
new_xywh = self.xywh
135139
if direction == 0: # mirror
136-
self._xywh[0] = 1 - self._xywh[0]
140+
new_xywh[0] = 1 - new_xywh[0]
137141

138142
elif direction == 1: # vertical
139-
self._xywh[1] = 1 - self._xywh[1]
143+
new_xywh[1] = 1 - new_xywh[1]
144+
145+
self.xywh = new_xywh
140146

141147
self.augmented = True
142148

@@ -175,7 +181,7 @@ def dot(self, rotMat: np.ndarray, width: int, height: int):
175181
new_w /= width
176182
new_h /= height
177183

178-
self._xywh = np.array([new_x, new_y, new_w, new_h])
184+
self.xywh = np.array([new_x, new_y, new_w, new_h])
179185

180186
self.width = width
181187
self.height = height

mltu/augmentors.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -691,12 +691,11 @@ def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image,
691691
xywh,
692692
label=detection.label,
693693
labels=detection.labels,
694-
bbox_type=detection.bbox_type,
695694
confidence=detection.confidence,
696695
image_path=detection.image_path,
697696
width=width,
698697
height=height,
699-
relative=detection.relative
698+
relative=True
700699
)
701700
detections.append(new_detection)
702701

mltu/dataProvider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def __iter__(self):
225225

226226
def process_data(self, batch_data):
227227
""" Process data batch of data """
228-
if self._use_cache and batch_data[0] in self._cache:
228+
if self._use_cache and batch_data[0] in self._cache and isinstance(batch_data[0], str):
229229
data, annotation = copy.deepcopy(self._cache[batch_data[0]])
230230
else:
231231
data, annotation = batch_data

mltu/torch/dataProvider.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ def __init__(
169169
workers (int, optional): Number of workers to use for multiprocessing or multithreading. Defaults to os.cpu_count().
170170
use_multiprocessing (bool, optional): Whether to use multiprocessing or multithreading. Defaults to multithreading (False).
171171
max_queue_size (int, optional): Maximum size of the queue. Defaults to 5.
172+
numpy (bool, optional): Whether to convert data to numpy. Defaults to True.
172173
"""
173174
super(DataProvider, self).__init__(dataset=dataset, data_preprocessors=data_preprocessors, batch_size=batch_size,
174175
shuffle=shuffle, initial_epoch=initial_epoch, augmentors=augmentors, transformers=transformers, batch_postprocessors=batch_postprocessors,

mltu/torch/handlers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ def results(self, loss, train: bool=True):
3939
else:
4040
results_dict[suffix + metric.name] = result
4141

42-
return {k: round(v, 4) for k, v in results_dict.items() if v}
42+
logs = {k: round(v, 4) for k, v in results_dict.items() if v is not None}
43+
return logs
4344

4445
def description(self, epoch: int=None, train: bool=True):
4546
epoch_desc = f"Epoch {epoch} - " if epoch is not None else " "

mltu/torch/yolo/annotation.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import os
2+
import typing
3+
from pathlib import Path
4+
import xml.etree.ElementTree as ET
5+
from mltu.annotations.detections import Detections, Detection, BboxType
6+
7+
class VOCAnnotationReader:
8+
"""Reads annotations from VOC format
9+
"""
10+
def __init__(self, labels: dict, images_path: str=None):
11+
self.labels = labels
12+
self.images_path = images_path
13+
14+
@staticmethod
15+
def readFromVOC(voc_annotation_path: str, labels: dict, images_path: str=None) -> Detections:
16+
annotation_path = Path(voc_annotation_path)
17+
tree = ET.parse(voc_annotation_path)
18+
root = tree.getroot()
19+
20+
annotation_dict = {}
21+
22+
# Iterate through child elements
23+
for child in root:
24+
if child.tag == 'object':
25+
obj_dict = {}
26+
for obj_child in child:
27+
if obj_child.tag == 'bndbox':
28+
bbox_dict = {}
29+
for bbox_child in obj_child:
30+
bbox_dict[bbox_child.tag] = int(bbox_child.text)
31+
obj_dict[obj_child.tag] = bbox_dict
32+
else:
33+
obj_dict[obj_child.tag] = obj_child.text
34+
if 'objects' not in annotation_dict:
35+
annotation_dict['objects'] = []
36+
annotation_dict['objects'].append(obj_dict)
37+
elif child.tag == 'size':
38+
size_dict = {}
39+
for size_child in child:
40+
size_dict[size_child.tag] = int(size_child.text)
41+
annotation_dict['size'] = size_dict
42+
else:
43+
annotation_dict[child.tag] = child.text
44+
45+
# Get the image path if not provided
46+
if images_path is None:
47+
images_path = annotation_path.parent.parent / annotation_dict["folder"]
48+
49+
image_path = os.path.join(images_path, annotation_dict['filename'])
50+
dets = []
51+
for obj in annotation_dict['objects']:
52+
if obj['name'] not in labels.values():
53+
print(f"Label {obj['name']} not found in labels")
54+
continue
55+
56+
dets.append(Detection(
57+
bbox=[obj['bndbox']['xmin'], obj['bndbox']['ymin'], obj['bndbox']['xmax'], obj['bndbox']['ymax']],
58+
label=obj['name'],
59+
bbox_type=BboxType.XYXY,
60+
confidence=1,
61+
image_path=image_path,
62+
width=annotation_dict['size']['width'],
63+
height=annotation_dict['size']['height'],
64+
relative=False
65+
))
66+
67+
detections = Detections(
68+
labels=labels,
69+
width=annotation_dict['size']['width'],
70+
height=annotation_dict['size']['height'],
71+
image_path=image_path,
72+
detections=dets
73+
)
74+
75+
return detections
76+
77+
def __call__(self, image: typing.Any, annotation: str) -> typing.Tuple[typing.Any, Detections]:
78+
detections = self.readFromVOC(annotation, self.labels, self.images_path)
79+
if image is None:
80+
image = detections.image_path
81+
return image, detections

mltu/torch/yolo/preprocessors.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import torch
2+
import numpy as np
3+
4+
class YoloPreprocessor:
5+
def __init__(self, device, imgsz=640):
6+
self.device = device
7+
self.imgsz = imgsz
8+
9+
def __call__(self, images, annotations):
10+
batch = {
11+
"ori_shape": [],
12+
"resized_shape": [],
13+
"cls": [],
14+
"bboxes": [],
15+
"batch_idx": [],
16+
}
17+
18+
for i, (image, detections) in enumerate(zip(images, annotations)):
19+
batch["ori_shape"].append([detections.height, detections.width])
20+
batch["resized_shape"].append([self.imgsz, self.imgsz])
21+
for detection in detections:
22+
batch["cls"].append([detection.labelId])
23+
batch["bboxes"].append(detection.xywh)
24+
batch["batch_idx"].append(i)
25+
26+
batch["cls"] = torch.tensor(batch["cls"]).to(self.device)
27+
batch["bboxes"] = torch.tensor(batch["bboxes"]).to(self.device)
28+
batch["batch_idx"] = torch.tensor(batch["batch_idx"]).to(self.device)
29+
30+
return np.array(images), batch

mltu/torch/yolo/train_yolo.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import os
2+
import time
3+
import torch
4+
from mltu.preprocessors import ImageReader
5+
from mltu.annotations.images import CVImage
6+
from mltu.transformers import ImageResizer, ImageShowCV2, ImageNormalizer
7+
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen, \
8+
RandomMirror, RandomFlip, RandomGaussianBlur, RandomSaltAndPepper, RandomDropBlock, RandomMosaic
9+
from mltu.torch.model import Model
10+
from mltu.torch.dataProvider import DataProvider
11+
from mltu.torch.yolo.annotation import VOCAnnotationReader
12+
from mltu.torch.yolo.preprocessors import YoloPreprocessor
13+
from mltu.torch.yolo.loss import v8DetectionLoss
14+
from mltu.torch.yolo.metrics import YoloMetrics
15+
from mltu.torch.yolo.optimizer import build_optimizer, AccumulativeOptimizer
16+
from mltu.torch.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard, Model2onnx, WarmupCosineDecay
17+
18+
from ultralytics.nn.tasks import DetectionModel
19+
from ultralytics.engine.model import Model as BaseModel
20+
21+
22+
annotations_path = "Datasets/car-plate-detection/annotations"
23+
24+
dataset = [[None, os.path.join(annotations_path, f)] for f in os.listdir(annotations_path)]
25+
26+
# Make sure torch can see GPU device, it is not recommended to train with CPU
27+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28+
29+
img_size = 416
30+
labels = {0: "licence"}
31+
32+
# Create a data provider for the dataset
33+
data_provider = DataProvider(
34+
dataset=dataset,
35+
skip_validation=True,
36+
batch_size=16,
37+
data_preprocessors=[
38+
VOCAnnotationReader(labels=labels),
39+
ImageReader(CVImage),
40+
],
41+
transformers=[
42+
# ImageShowCV2(),
43+
ImageResizer(img_size, img_size),
44+
ImageNormalizer(transpose_axis=True),
45+
],
46+
batch_postprocessors=[
47+
YoloPreprocessor(device, img_size)
48+
],
49+
numpy=False,
50+
)
51+
52+
# for b in data_provider:
53+
# pass
54+
55+
# split the dataset into train and test
56+
train_data_provider, val_data_provider = data_provider.split(0.9, shuffle=False)
57+
58+
# Attaach augmentation to the train data provider
59+
train_data_provider.augmentors = [
60+
RandomBrightness(),
61+
RandomErodeDilate(),
62+
RandomSharpen(),
63+
RandomMirror(),
64+
RandomFlip(),
65+
RandomGaussianBlur(),
66+
RandomSaltAndPepper(),
67+
RandomRotate(angle=10),
68+
RandomDropBlock(),
69+
RandomMosaic(),
70+
]
71+
72+
# for batch in train_data_provider:
73+
# pass
74+
# print(batch)
75+
# break
76+
77+
78+
79+
base_model = BaseModel("yolov8n.pt")
80+
# Create a YOLO model
81+
model = DetectionModel('yolov8n.yaml', nc=len(labels))
82+
83+
try: model.load_state_dict(base_model.model.state_dict(), strict=False)
84+
except: pass
85+
86+
model.to(device)
87+
88+
for k, v in model.named_parameters():
89+
if any(x in k for x in [".dfl"]):
90+
print("freezing", k)
91+
v.requires_grad = False
92+
elif not v.requires_grad:
93+
v.requires_grad = True
94+
95+
lr = 1e-3
96+
optimizer = build_optimizer(model.model, name="AdamW", lr=lr, weight_decay=0.0, momentum=0.937, decay=0.0005)
97+
optimizer = AccumulativeOptimizer(optimizer, 16, 64)
98+
99+
# create model object that will handle training and testing of the network
100+
model = Model(
101+
model,
102+
optimizer,
103+
v8DetectionLoss(model),
104+
metrics=[YoloMetrics(nc=len(labels))],
105+
log_errors=False,
106+
output_path=f"Models/detector/{int(time.time())}",
107+
clip_grad_norm=10.0,
108+
ema=True,
109+
)
110+
111+
modelCheckpoint = ModelCheckpoint(monitor="val_fitness", mode="max", save_best_only=True, verbose=True)
112+
tensorBoard = TensorBoard()
113+
earlyStopping = EarlyStopping(monitor="val_fitness", mode="max", patience=31, verbose=True)
114+
model2onnx = Model2onnx(input_shape=(1, 3, img_size, img_size), verbose=True, opset_version=14,
115+
dynamic_axes = {"input": {0: "batch_size", 2: "height", 3: "width"},
116+
"output": {0: "batch_size", 2: "anchors"}},
117+
metadata={"classes": labels})
118+
warmupCosineDecayBias = WarmupCosineDecay(lr_after_warmup=lr, final_lr=lr, initial_lr=0.1,
119+
warmup_steps=len(train_data_provider), warmup_epochs=10, ignore_param_groups=[1, 2]) # lr0
120+
warmupCosineDecay = WarmupCosineDecay(lr_after_warmup=lr, final_lr=lr/10, initial_lr=1e-7,
121+
warmup_steps=len(train_data_provider), warmup_epochs=10, decay_epochs=190, ignore_param_groups=[0]) # lr1 and lr2
122+
123+
# Train the model
124+
history = model.fit(
125+
train_data_provider,
126+
test_dataProvider=val_data_provider,
127+
epochs=200,
128+
callbacks=[
129+
modelCheckpoint,
130+
tensorBoard,
131+
earlyStopping,
132+
model2onnx,
133+
warmupCosineDecayBias,
134+
warmupCosineDecay
135+
]
136+
)

mltu/transformers.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
- ImageResizer - Resize image to (width, height)
1414
- LabelIndexer - Convert label to index by vocab
1515
- LabelPadding - Pad label to max_word_length
16+
- ImageNormalizer - Normalize image to float value, transpose axis if necessary and convert to numpy
1617
- SpectrogramPadding - Pad spectrogram to max_spectrogram_length
1718
- AudioToSpectrogram - Convert Audio to Spectrogram
1819
- ImageShowCV2 - Show image for visual inspection
@@ -171,6 +172,28 @@ def __call__(self, data: np.ndarray, label: np.ndarray):
171172
return data, np.pad(label, (0, self.max_word_length - len(label)), "constant", constant_values=self.padding_value)
172173

173174

175+
class ImageNormalizer:
176+
""" Normalize image to float value, transpose axis if necessary and convert to numpy
177+
"""
178+
def __init__(self, transpose_axis: bool=False):
179+
""" Initialize ImageNormalizer
180+
181+
Args:
182+
transpose_axis (bool): Whether to transpose axis. Default: False
183+
"""
184+
self.transpose_axis = transpose_axis
185+
186+
def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[np.ndarray, typing.Any]:
187+
""" Convert each Image to numpy, transpose axis ant normalize to float value
188+
"""
189+
img = image.numpy() / 255.0
190+
191+
if self.transpose_axis:
192+
img = img.transpose(2, 0, 1)
193+
194+
return img, annotation
195+
196+
174197
class SpectrogramPadding(Transformer):
175198
"""Pad spectrogram to max_spectrogram_length
176199

0 commit comments

Comments
 (0)