Merge branch 'develop'

pythonlessons · pythonlessons · commit 506c06a8a98c · 2024-03-12T12:20:13.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,12 @@
+## [1.2.1] - 2024-03-12
+### Changed
+- Fixed many minor bugs
+
+### Added
+- Added `mltu.transformers.ImageNormalizer` to normalize and transpose images
+- Added `mltu.torch.yolo.annotation.VOCAnnotationReader` to read VOC annotation files
+- Added `mltu.torch.yolo.preprocessors.YoloPreprocessor` to preprocess images and annotations for YoloV8 detection model
+
 ## [1.2.0] - 2024-03-05
 ### Changed
 - Creating code to work with Ultralytics YoloV8 Detection model (training and inference)
diff --git a/mltu/__init__.py b/mltu/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "1.2.0"
+__version__ = "1.2.1"
 
 from .annotations.images import Image
 from .annotations.images import CVImage
diff --git a/mltu/annotations/detections.py b/mltu/annotations/detections.py
@@ -114,6 +114,9 @@ def validate(self):
             if self.width is None or self.height is None:
                 raise ValueError("width and height must be provided when relative is False")
             
+            if (np.array(self.bbox) > 1.0).any():
+                raise ValueError("bbox coordinates must be in range [0, 1] when relative is False")
+            
             bbox = np.array(self.bbox) / np.array([self.width, self.height, self.width, self.height])
 
         else:
@@ -132,11 +135,14 @@ def validate(self):
             raise ValueError(f"bbox_type {self.bbox_type} not supported")
         
     def flip(self, direction: int):
+        new_xywh = self.xywh
         if direction == 0: # mirror
-            self._xywh[0] = 1 - self._xywh[0]
+            new_xywh[0] = 1 - new_xywh[0]
 
         elif direction == 1: # vertical
-            self._xywh[1] = 1 - self._xywh[1]
+            new_xywh[1] = 1 - new_xywh[1]
+
+        self.xywh = new_xywh
         
         self.augmented = True
 
@@ -175,7 +181,7 @@ def dot(self, rotMat: np.ndarray, width: int, height: int):
         new_w /= width
         new_h /= height
 
-        self._xywh = np.array([new_x, new_y, new_w, new_h])
+        self.xywh = np.array([new_x, new_y, new_w, new_h])
 
         self.width = width
         self.height = height
diff --git a/mltu/augmentors.py b/mltu/augmentors.py
@@ -691,12 +691,11 @@ def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image,
                             xywh, 
                             label=detection.label, 
                             labels=detection.labels,
-                            bbox_type=detection.bbox_type,
                             confidence=detection.confidence, 
                             image_path=detection.image_path, 
                             width=width, 
                             height=height,
-                            relative=detection.relative
+                            relative=True
                         )
                         detections.append(new_detection)
 
diff --git a/mltu/dataProvider.py b/mltu/dataProvider.py
@@ -225,7 +225,7 @@ def __iter__(self):
 
     def process_data(self, batch_data):
         """ Process data batch of data """
-        if self._use_cache and batch_data[0] in self._cache:
+        if self._use_cache and batch_data[0] in self._cache and isinstance(batch_data[0], str):
             data, annotation = copy.deepcopy(self._cache[batch_data[0]])
         else:
             data, annotation = batch_data
diff --git a/mltu/torch/dataProvider.py b/mltu/torch/dataProvider.py
@@ -169,6 +169,7 @@ def __init__(
             workers (int, optional): Number of workers to use for multiprocessing or multithreading. Defaults to os.cpu_count().
             use_multiprocessing (bool, optional): Whether to use multiprocessing or multithreading. Defaults to multithreading (False).
             max_queue_size (int, optional): Maximum size of the queue. Defaults to 5.
+            numpy (bool, optional): Whether to convert data to numpy. Defaults to True.
         """
         super(DataProvider, self).__init__(dataset=dataset, data_preprocessors=data_preprocessors, batch_size=batch_size, 
                                            shuffle=shuffle, initial_epoch=initial_epoch, augmentors=augmentors, transformers=transformers, batch_postprocessors=batch_postprocessors,
diff --git a/mltu/torch/handlers.py b/mltu/torch/handlers.py
@@ -39,7 +39,8 @@ def results(self, loss, train: bool=True):
                 else:
                     results_dict[suffix + metric.name] = result
 
-        return {k: round(v, 4) for k, v in results_dict.items() if v}
+        logs = {k: round(v, 4) for k, v in results_dict.items() if v is not None}
+        return logs
     
     def description(self, epoch: int=None, train: bool=True):
         epoch_desc = f"Epoch {epoch} - " if epoch is not None else "          "
diff --git a/mltu/torch/yolo/annotation.py b/mltu/torch/yolo/annotation.py
@@ -0,0 +1,81 @@
+import os
+import typing
+from pathlib import Path
+import xml.etree.ElementTree as ET
+from mltu.annotations.detections import Detections, Detection, BboxType
+
+class VOCAnnotationReader:
+    """Reads annotations from VOC format
+    """
+    def __init__(self, labels: dict, images_path: str=None):
+        self.labels = labels
+        self.images_path = images_path
+
+    @staticmethod
+    def readFromVOC(voc_annotation_path: str, labels: dict, images_path: str=None) -> Detections:
+        annotation_path = Path(voc_annotation_path)
+        tree = ET.parse(voc_annotation_path)
+        root = tree.getroot()
+
+        annotation_dict = {}
+
+        # Iterate through child elements
+        for child in root:
+            if child.tag == 'object':
+                obj_dict = {}
+                for obj_child in child:
+                    if obj_child.tag == 'bndbox':
+                        bbox_dict = {}
+                        for bbox_child in obj_child:
+                            bbox_dict[bbox_child.tag] = int(bbox_child.text)
+                        obj_dict[obj_child.tag] = bbox_dict
+                    else:
+                        obj_dict[obj_child.tag] = obj_child.text
+                if 'objects' not in annotation_dict:
+                    annotation_dict['objects'] = []
+                annotation_dict['objects'].append(obj_dict)
+            elif child.tag == 'size':
+                size_dict = {}
+                for size_child in child:
+                    size_dict[size_child.tag] = int(size_child.text)
+                annotation_dict['size'] = size_dict
+            else:
+                annotation_dict[child.tag] = child.text
+
+        # Get the image path if not provided
+        if images_path is None:
+            images_path = annotation_path.parent.parent / annotation_dict["folder"]
+
+        image_path = os.path.join(images_path, annotation_dict['filename'])
+        dets = []
+        for obj in annotation_dict['objects']:
+            if obj['name'] not in labels.values():
+                print(f"Label {obj['name']} not found in labels")
+                continue
+
+            dets.append(Detection(
+                bbox=[obj['bndbox']['xmin'], obj['bndbox']['ymin'], obj['bndbox']['xmax'], obj['bndbox']['ymax']],
+                label=obj['name'],
+                bbox_type=BboxType.XYXY,
+                confidence=1,
+                image_path=image_path,
+                width=annotation_dict['size']['width'],
+                height=annotation_dict['size']['height'],
+                relative=False
+                ))
+        
+        detections = Detections(
+            labels=labels,
+            width=annotation_dict['size']['width'],
+            height=annotation_dict['size']['height'],
+            image_path=image_path,
+            detections=dets
+        )
+
+        return detections
+    
+    def __call__(self, image: typing.Any, annotation: str) -> typing.Tuple[typing.Any, Detections]:
+        detections = self.readFromVOC(annotation, self.labels, self.images_path)
+        if image is None:
+            image = detections.image_path
+        return image, detections
diff --git a/mltu/torch/yolo/preprocessors.py b/mltu/torch/yolo/preprocessors.py
@@ -0,0 +1,30 @@
+import torch
+import numpy as np
+
+class YoloPreprocessor:
+    def __init__(self, device, imgsz=640):
+        self.device = device
+        self.imgsz = imgsz
+
+    def __call__(self, images, annotations):
+        batch = {
+            "ori_shape": [],
+            "resized_shape": [],
+            "cls": [],
+            "bboxes": [],
+            "batch_idx": [],
+        }
+
+        for i, (image, detections) in enumerate(zip(images, annotations)):
+            batch["ori_shape"].append([detections.height, detections.width])
+            batch["resized_shape"].append([self.imgsz, self.imgsz])
+            for detection in detections:
+                batch["cls"].append([detection.labelId])
+                batch["bboxes"].append(detection.xywh)
+                batch["batch_idx"].append(i)
+
+        batch["cls"] = torch.tensor(batch["cls"]).to(self.device)
+        batch["bboxes"] = torch.tensor(batch["bboxes"]).to(self.device)
+        batch["batch_idx"] = torch.tensor(batch["batch_idx"]).to(self.device)
+
+        return np.array(images), batch
diff --git a/mltu/torch/yolo/train_yolo.py b/mltu/torch/yolo/train_yolo.py
@@ -0,0 +1,136 @@
+import os
+import time
+import torch
+from mltu.preprocessors import ImageReader
+from mltu.annotations.images import CVImage
+from mltu.transformers import ImageResizer, ImageShowCV2, ImageNormalizer
+from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen, \
+    RandomMirror, RandomFlip, RandomGaussianBlur, RandomSaltAndPepper, RandomDropBlock, RandomMosaic
+from mltu.torch.model import Model
+from mltu.torch.dataProvider import DataProvider
+from mltu.torch.yolo.annotation import VOCAnnotationReader
+from mltu.torch.yolo.preprocessors import YoloPreprocessor
+from mltu.torch.yolo.loss import v8DetectionLoss
+from mltu.torch.yolo.metrics import YoloMetrics
+from mltu.torch.yolo.optimizer import build_optimizer, AccumulativeOptimizer
+from mltu.torch.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard, Model2onnx, WarmupCosineDecay
+
+from ultralytics.nn.tasks import DetectionModel
+from ultralytics.engine.model import Model as BaseModel
+
+
+annotations_path = "Datasets/car-plate-detection/annotations"
+
+dataset = [[None, os.path.join(annotations_path, f)] for f in os.listdir(annotations_path)]
+
+# Make sure torch can see GPU device, it is not recommended to train with CPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+img_size = 416
+labels = {0: "licence"}
+
+# Create a data provider for the dataset
+data_provider = DataProvider(
+    dataset=dataset,
+    skip_validation=True,
+    batch_size=16,
+    data_preprocessors=[
+        VOCAnnotationReader(labels=labels),
+        ImageReader(CVImage),
+        ],
+    transformers=[
+        # ImageShowCV2(),
+        ImageResizer(img_size, img_size),
+        ImageNormalizer(transpose_axis=True),
+    ],
+    batch_postprocessors=[
+        YoloPreprocessor(device, img_size)
+    ],
+    numpy=False,
+)
+
+# for b in data_provider:
+#     pass
+
+# split the dataset into train and test
+train_data_provider, val_data_provider = data_provider.split(0.9, shuffle=False)
+
+# Attaach augmentation to the train data provider
+train_data_provider.augmentors = [
+    RandomBrightness(), 
+    RandomErodeDilate(),
+    RandomSharpen(),
+    RandomMirror(),
+    RandomFlip(),
+    RandomGaussianBlur(),
+    RandomSaltAndPepper(),
+    RandomRotate(angle=10),
+    RandomDropBlock(),
+    RandomMosaic(),
+]
+
+# for batch in train_data_provider:
+#     pass
+    # print(batch)
+    # break
+
+
+
+base_model = BaseModel("yolov8n.pt")
+# Create a YOLO model
+model = DetectionModel('yolov8n.yaml', nc=len(labels))
+
+try: model.load_state_dict(base_model.model.state_dict(), strict=False)
+except: pass
+
+model.to(device)
+
+for k, v in model.named_parameters():
+    if any(x in k for x in [".dfl"]):
+        print("freezing", k)
+        v.requires_grad = False
+    elif not v.requires_grad:
+        v.requires_grad = True
+
+lr = 1e-3
+optimizer = build_optimizer(model.model, name="AdamW", lr=lr, weight_decay=0.0, momentum=0.937, decay=0.0005)
+optimizer = AccumulativeOptimizer(optimizer, 16, 64)
+
+# create model object that will handle training and testing of the network
+model = Model(
+    model, 
+    optimizer, 
+    v8DetectionLoss(model), 
+    metrics=[YoloMetrics(nc=len(labels))],
+    log_errors=False,
+    output_path=f"Models/detector/{int(time.time())}",
+    clip_grad_norm=10.0,
+    ema=True,
+)
+
+modelCheckpoint = ModelCheckpoint(monitor="val_fitness", mode="max", save_best_only=True, verbose=True)
+tensorBoard = TensorBoard()
+earlyStopping = EarlyStopping(monitor="val_fitness", mode="max", patience=31, verbose=True)
+model2onnx = Model2onnx(input_shape=(1, 3, img_size, img_size), verbose=True, opset_version=14, 
+                    dynamic_axes = {"input": {0: "batch_size", 2: "height", 3: "width"}, 
+                                    "output": {0: "batch_size", 2: "anchors"}},
+                    metadata={"classes": labels})
+warmupCosineDecayBias = WarmupCosineDecay(lr_after_warmup=lr, final_lr=lr, initial_lr=0.1, 
+                                      warmup_steps=len(train_data_provider), warmup_epochs=10, ignore_param_groups=[1, 2]) # lr0
+warmupCosineDecay = WarmupCosineDecay(lr_after_warmup=lr, final_lr=lr/10, initial_lr=1e-7, 
+                                      warmup_steps=len(train_data_provider), warmup_epochs=10, decay_epochs=190, ignore_param_groups=[0]) # lr1 and lr2
+
+# Train the model
+history = model.fit(
+    train_data_provider,
+    test_dataProvider=val_data_provider,
+    epochs=200, 
+    callbacks=[
+        modelCheckpoint, 
+        tensorBoard, 
+        earlyStopping, 
+        model2onnx,
+        warmupCosineDecayBias,
+        warmupCosineDecay
+        ]
+    )
diff --git a/mltu/transformers.py b/mltu/transformers.py
@@ -13,6 +13,7 @@
 - ImageResizer - Resize image to (width, height)
 - LabelIndexer - Convert label to index by vocab
 - LabelPadding - Pad label to max_word_length
+- ImageNormalizer - Normalize image to float value, transpose axis if necessary and convert to numpy
 - SpectrogramPadding - Pad spectrogram to max_spectrogram_length
 - AudioToSpectrogram - Convert Audio to Spectrogram
 - ImageShowCV2 - Show image for visual inspection
@@ -171,6 +172,28 @@ def __call__(self, data: np.ndarray, label: np.ndarray):
         return data, np.pad(label, (0, self.max_word_length - len(label)), "constant", constant_values=self.padding_value)
 
 
+class ImageNormalizer:
+    """ Normalize image to float value, transpose axis if necessary and convert to numpy
+    """
+    def __init__(self, transpose_axis: bool=False):
+        """ Initialize ImageNormalizer
+
+        Args:
+            transpose_axis (bool): Whether to transpose axis. Default: False
+        """
+        self.transpose_axis = transpose_axis
+    
+    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[np.ndarray, typing.Any]:
+        """ Convert each Image to numpy, transpose axis ant normalize to float value
+        """
+        img = image.numpy() / 255.0
+
+        if self.transpose_axis:
+            img = img.transpose(2, 0, 1)
+        
+        return img, annotation
+
+
 class SpectrogramPadding(Transformer):
     """Pad spectrogram to max_spectrogram_length
     

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "1.2.0"`
	`1`	`+__version__ = "1.2.1"`
`2`	`2`
`3`	`3`	`from .annotations.images import Image`
`4`	`4`	`from .annotations.images import CVImage`