Skip to content

Commit 4d085bc

Browse files
committed
Merge branch 'develop'
2 parents 506c06a + 6ba4798 commit 4d085bc

File tree

4 files changed

+142
-7
lines changed

4 files changed

+142
-7
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
## [1.2.2] - 2024-03-15
2+
### Changed
3+
- Bug fixed with `loss_info` local variable in `mltu.torch.model.Model` object
4+
5+
### Added
6+
- Added `RandomColorMode` and `RandomZoom` into `mltu.augmentors`
7+
8+
19
## [1.2.1] - 2024-03-12
210
### Changed
311
- Fixed many minor bugs

mltu/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.2.1"
1+
__version__ = "1.2.2"
22

33
from .annotations.images import Image
44
from .annotations.images import CVImage

mltu/augmentors.py

Lines changed: 127 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from . import Image
88
from mltu.annotations.audio import Audio
9-
from mltu.annotations.detections import Detections, Detection
9+
from mltu.annotations.detections import Detections, Detection, BboxType
1010

1111
"""
1212
Implemented image augmentors:
@@ -20,6 +20,8 @@
2020
- RandomFlip
2121
- RandomDropBlock
2222
- RandomMosaic
23+
- RandomZoom
24+
- RandomColorMode
2325
2426
Implemented audio augmentors:
2527
- RandomAudioNoise
@@ -379,21 +381,22 @@ def __init__(
379381
self,
380382
random_chance: float = 0.5,
381383
log_level: int = logging.INFO,
382-
sigma: typing.Union[int, float] = 0.5,
384+
sigma: typing.Union[int, float] = 1.5,
383385
augment_annotation: bool = False,
384386
) -> None:
385387
""" Randomly erode and dilate image
386388
387389
Args:
388390
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
389391
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
390-
sigma (int, float): standard deviation of the Gaussian kernel
392+
sigma (int, float): maximum sigma value for Gaussian blur. Defaults to 1.5.
391393
"""
392394
super(RandomGaussianBlur, self).__init__(random_chance, log_level, augment_annotation)
393395
self.sigma = sigma
394396

395397
def augment(self, image: Image) -> Image:
396-
img = cv2.GaussianBlur(image.numpy(), (0, 0), self.sigma)
398+
sigma = np.random.uniform(0, self.sigma)
399+
img = cv2.GaussianBlur(image.numpy(), (0, 0), sigma)
397400

398401
image.update(img)
399402

@@ -716,6 +719,126 @@ def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image,
716719
return image, annotation
717720

718721

722+
class RandomZoom(Augmentor):
723+
def __init__(
724+
self,
725+
random_chance: float = 0.5,
726+
log_level: int = logging.INFO,
727+
augment_annotation: bool = True,
728+
object_crop_percentage: float = 0.5,
729+
) -> None:
730+
""" Randomly zoom into an image
731+
732+
Args:
733+
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
734+
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
735+
augment_annotation (bool): Whether to augment the annotation. Defaults to False.
736+
object_crop_percentage (float): Percentage of the object allowed to be cropped. Defaults to 0.5.
737+
"""
738+
super(RandomZoom, self).__init__(random_chance, log_level, augment_annotation)
739+
self.object_crop_percentage = object_crop_percentage
740+
741+
@randomness_decorator
742+
def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
743+
""" Randomly zoom an image
744+
745+
Args:
746+
image (Image): Image to be used for zoom
747+
annotation (typing.Any): Annotation to be used for zoom
748+
749+
Returns:
750+
image (Image): Zoomed image
751+
annotation (typing.Any): Zoomed annotation if necessary
752+
"""
753+
if isinstance(annotation, Detections) and self._augment_annotation:
754+
755+
dets = np.array([detection.xyxy for detection in annotation])
756+
min_left = np.min(dets[:, 0])
757+
min_top = np.min(dets[:, 1])
758+
max_right = np.max(dets[:, 2])
759+
max_bottom = np.max(dets[:, 3])
760+
761+
# Calculate the size of the object
762+
object_width = max_right - min_left
763+
object_height = max_bottom - min_top
764+
765+
crop_xmin = np.random.uniform(0, min_left + 0.25 * object_width * self.object_crop_percentage)
766+
crop_ymin = np.random.uniform(0, min_top + 0.25 * object_height * self.object_crop_percentage)
767+
crop_xmax = np.random.uniform(max_right - 0.25 * object_width * self.object_crop_percentage, 1)
768+
crop_ymax = np.random.uniform(max_bottom - 0.25 * object_height * self.object_crop_percentage, 1)
769+
770+
crop_min_max = np.array([crop_xmin, crop_ymin, crop_xmax, crop_ymax])
771+
new_xyxy = (crop_min_max * np.array([image.width, image.height, image.width, image.height])).astype(int)
772+
new_image = image.numpy()[new_xyxy[1]:new_xyxy[3], new_xyxy[0]:new_xyxy[2]]
773+
image.update(new_image)
774+
775+
crop_min_ratio = np.array([crop_xmin, crop_ymin, crop_xmin, crop_ymin])
776+
crop_max_ratio = np.array([crop_xmax, crop_ymax, crop_xmax, crop_ymax])
777+
new_dets = (dets - crop_min_ratio) / (crop_max_ratio - crop_min_ratio)
778+
779+
detections = []
780+
for detection, new_det in zip(annotation, new_dets):
781+
new_detection = Detection(
782+
new_det,
783+
label=detection.label,
784+
labels=detection.labels,
785+
confidence=detection.confidence,
786+
image_path=detection.image_path,
787+
width=image.width,
788+
height=image.height,
789+
relative=True,
790+
bbox_type = BboxType.XYXY
791+
)
792+
793+
detections.append(new_detection)
794+
795+
annotation = Detections(
796+
labels=annotation.labels,
797+
width=image.width,
798+
height=image.height,
799+
detections=detections
800+
)
801+
802+
return image, annotation
803+
804+
805+
class RandomColorMode(Augmentor):
806+
def __init__(
807+
self,
808+
random_chance: float = 0.5,
809+
log_level: int = logging.INFO,
810+
augment_annotation: bool = False,
811+
) -> None:
812+
""" Randomly change color mode of an image
813+
814+
Args:
815+
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
816+
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
817+
augment_annotation (bool): Whether to augment the annotation. Defaults to False.
818+
"""
819+
super(RandomColorMode, self).__init__(random_chance, log_level, augment_annotation)
820+
821+
@randomness_decorator
822+
def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
823+
""" Randomly change color mode of an image
824+
825+
Args:
826+
image (Image): Image to be used for color mode change
827+
annotation (typing.Any): Annotation to be used for color mode change
828+
829+
Returns:
830+
image (Image): Color mode changed image
831+
annotation (typing.Any): Color mode changed annotation if necessary
832+
"""
833+
color_mode = np.random.choice([cv2.COLOR_BGR2GRAY, cv2.COLOR_BGR2HSV, cv2.COLOR_BGR2LAB, cv2.COLOR_BGR2YCrCb, cv2.COLOR_BGR2RGB])
834+
new_image = cv2.cvtColor(image.numpy(), color_mode)
835+
if color_mode == cv2.COLOR_BGR2GRAY:
836+
new_image = cv2.cvtColor(new_image, cv2.COLOR_GRAY2BGR)
837+
image.update(new_image)
838+
839+
return image, annotation
840+
841+
719842
class RandomAudioNoise(Augmentor):
720843
""" Randomly add noise to audio
721844

mltu/torch/model.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,13 +177,15 @@ def toDevice(self, data: np.ndarray, target: np.ndarray) -> typing.Tuple[torch.T
177177
def train_step(
178178
self,
179179
data: typing.Union[np.ndarray, torch.Tensor],
180-
target: typing.Union[np.ndarray, torch.Tensor]
180+
target: typing.Union[np.ndarray, torch.Tensor],
181+
loss_info: dict = {}
181182
) -> torch.Tensor:
182183
""" Perform one training step
183184
184185
Args:
185186
data (typing.Union[np.ndarray, torch.Tensor]): training data
186187
target (typing.Union[np.ndarray, torch.Tensor]): training target
188+
loss_info (dict, optional): additional loss information. Defaults to {}.
187189
188190
Returns:
189191
torch.Tensor: loss
@@ -228,13 +230,15 @@ def train_step(
228230
def test_step(
229231
self,
230232
data: typing.Union[np.ndarray, torch.Tensor],
231-
target: typing.Union[np.ndarray, torch.Tensor]
233+
target: typing.Union[np.ndarray, torch.Tensor],
234+
loss_info: dict = {}
232235
) -> torch.Tensor:
233236
""" Perform one validation step
234237
235238
Args:
236239
data (typing.Union[np.ndarray, torch.Tensor]): validation data
237240
target (typing.Union[np.ndarray, torch.Tensor]): validation target
241+
loss_info (dict, optional): additional loss information. Defaults to {}.
238242
239243
Returns:
240244
torch.Tensor: loss

0 commit comments

Comments
 (0)