|
6 | 6 |
|
7 | 7 | from . import Image
|
8 | 8 | from mltu.annotations.audio import Audio
|
9 |
| -from mltu.annotations.detections import Detections, Detection |
| 9 | +from mltu.annotations.detections import Detections, Detection, BboxType |
10 | 10 |
|
11 | 11 | """
|
12 | 12 | Implemented image augmentors:
|
|
20 | 20 | - RandomFlip
|
21 | 21 | - RandomDropBlock
|
22 | 22 | - RandomMosaic
|
| 23 | +- RandomZoom |
| 24 | +- RandomColorMode |
23 | 25 |
|
24 | 26 | Implemented audio augmentors:
|
25 | 27 | - RandomAudioNoise
|
@@ -379,21 +381,22 @@ def __init__(
|
379 | 381 | self,
|
380 | 382 | random_chance: float = 0.5,
|
381 | 383 | log_level: int = logging.INFO,
|
382 |
| - sigma: typing.Union[int, float] = 0.5, |
| 384 | + sigma: typing.Union[int, float] = 1.5, |
383 | 385 | augment_annotation: bool = False,
|
384 | 386 | ) -> None:
|
385 | 387 | """ Randomly erode and dilate image
|
386 | 388 |
|
387 | 389 | Args:
|
388 | 390 | random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
|
389 | 391 | log_level (int): Log level for the augmentor. Defaults to logging.INFO.
|
390 |
| - sigma (int, float): standard deviation of the Gaussian kernel |
| 392 | + sigma (int, float): maximum sigma value for Gaussian blur. Defaults to 1.5. |
391 | 393 | """
|
392 | 394 | super(RandomGaussianBlur, self).__init__(random_chance, log_level, augment_annotation)
|
393 | 395 | self.sigma = sigma
|
394 | 396 |
|
395 | 397 | def augment(self, image: Image) -> Image:
|
396 |
| - img = cv2.GaussianBlur(image.numpy(), (0, 0), self.sigma) |
| 398 | + sigma = np.random.uniform(0, self.sigma) |
| 399 | + img = cv2.GaussianBlur(image.numpy(), (0, 0), sigma) |
397 | 400 |
|
398 | 401 | image.update(img)
|
399 | 402 |
|
@@ -716,6 +719,126 @@ def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image,
|
716 | 719 | return image, annotation
|
717 | 720 |
|
718 | 721 |
|
| 722 | +class RandomZoom(Augmentor): |
| 723 | + def __init__( |
| 724 | + self, |
| 725 | + random_chance: float = 0.5, |
| 726 | + log_level: int = logging.INFO, |
| 727 | + augment_annotation: bool = True, |
| 728 | + object_crop_percentage: float = 0.5, |
| 729 | + ) -> None: |
| 730 | + """ Randomly zoom into an image |
| 731 | + |
| 732 | + Args: |
| 733 | + random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5. |
| 734 | + log_level (int): Log level for the augmentor. Defaults to logging.INFO. |
| 735 | + augment_annotation (bool): Whether to augment the annotation. Defaults to False. |
| 736 | + object_crop_percentage (float): Percentage of the object allowed to be cropped. Defaults to 0.5. |
| 737 | + """ |
| 738 | + super(RandomZoom, self).__init__(random_chance, log_level, augment_annotation) |
| 739 | + self.object_crop_percentage = object_crop_percentage |
| 740 | + |
| 741 | + @randomness_decorator |
| 742 | + def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]: |
| 743 | + """ Randomly zoom an image |
| 744 | +
|
| 745 | + Args: |
| 746 | + image (Image): Image to be used for zoom |
| 747 | + annotation (typing.Any): Annotation to be used for zoom |
| 748 | +
|
| 749 | + Returns: |
| 750 | + image (Image): Zoomed image |
| 751 | + annotation (typing.Any): Zoomed annotation if necessary |
| 752 | + """ |
| 753 | + if isinstance(annotation, Detections) and self._augment_annotation: |
| 754 | + |
| 755 | + dets = np.array([detection.xyxy for detection in annotation]) |
| 756 | + min_left = np.min(dets[:, 0]) |
| 757 | + min_top = np.min(dets[:, 1]) |
| 758 | + max_right = np.max(dets[:, 2]) |
| 759 | + max_bottom = np.max(dets[:, 3]) |
| 760 | + |
| 761 | + # Calculate the size of the object |
| 762 | + object_width = max_right - min_left |
| 763 | + object_height = max_bottom - min_top |
| 764 | + |
| 765 | + crop_xmin = np.random.uniform(0, min_left + 0.25 * object_width * self.object_crop_percentage) |
| 766 | + crop_ymin = np.random.uniform(0, min_top + 0.25 * object_height * self.object_crop_percentage) |
| 767 | + crop_xmax = np.random.uniform(max_right - 0.25 * object_width * self.object_crop_percentage, 1) |
| 768 | + crop_ymax = np.random.uniform(max_bottom - 0.25 * object_height * self.object_crop_percentage, 1) |
| 769 | + |
| 770 | + crop_min_max = np.array([crop_xmin, crop_ymin, crop_xmax, crop_ymax]) |
| 771 | + new_xyxy = (crop_min_max * np.array([image.width, image.height, image.width, image.height])).astype(int) |
| 772 | + new_image = image.numpy()[new_xyxy[1]:new_xyxy[3], new_xyxy[0]:new_xyxy[2]] |
| 773 | + image.update(new_image) |
| 774 | + |
| 775 | + crop_min_ratio = np.array([crop_xmin, crop_ymin, crop_xmin, crop_ymin]) |
| 776 | + crop_max_ratio = np.array([crop_xmax, crop_ymax, crop_xmax, crop_ymax]) |
| 777 | + new_dets = (dets - crop_min_ratio) / (crop_max_ratio - crop_min_ratio) |
| 778 | + |
| 779 | + detections = [] |
| 780 | + for detection, new_det in zip(annotation, new_dets): |
| 781 | + new_detection = Detection( |
| 782 | + new_det, |
| 783 | + label=detection.label, |
| 784 | + labels=detection.labels, |
| 785 | + confidence=detection.confidence, |
| 786 | + image_path=detection.image_path, |
| 787 | + width=image.width, |
| 788 | + height=image.height, |
| 789 | + relative=True, |
| 790 | + bbox_type = BboxType.XYXY |
| 791 | + ) |
| 792 | + |
| 793 | + detections.append(new_detection) |
| 794 | + |
| 795 | + annotation = Detections( |
| 796 | + labels=annotation.labels, |
| 797 | + width=image.width, |
| 798 | + height=image.height, |
| 799 | + detections=detections |
| 800 | + ) |
| 801 | + |
| 802 | + return image, annotation |
| 803 | + |
| 804 | + |
| 805 | +class RandomColorMode(Augmentor): |
| 806 | + def __init__( |
| 807 | + self, |
| 808 | + random_chance: float = 0.5, |
| 809 | + log_level: int = logging.INFO, |
| 810 | + augment_annotation: bool = False, |
| 811 | + ) -> None: |
| 812 | + """ Randomly change color mode of an image |
| 813 | + |
| 814 | + Args: |
| 815 | + random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5. |
| 816 | + log_level (int): Log level for the augmentor. Defaults to logging.INFO. |
| 817 | + augment_annotation (bool): Whether to augment the annotation. Defaults to False. |
| 818 | + """ |
| 819 | + super(RandomColorMode, self).__init__(random_chance, log_level, augment_annotation) |
| 820 | + |
| 821 | + @randomness_decorator |
| 822 | + def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]: |
| 823 | + """ Randomly change color mode of an image |
| 824 | +
|
| 825 | + Args: |
| 826 | + image (Image): Image to be used for color mode change |
| 827 | + annotation (typing.Any): Annotation to be used for color mode change |
| 828 | +
|
| 829 | + Returns: |
| 830 | + image (Image): Color mode changed image |
| 831 | + annotation (typing.Any): Color mode changed annotation if necessary |
| 832 | + """ |
| 833 | + color_mode = np.random.choice([cv2.COLOR_BGR2GRAY, cv2.COLOR_BGR2HSV, cv2.COLOR_BGR2LAB, cv2.COLOR_BGR2YCrCb, cv2.COLOR_BGR2RGB]) |
| 834 | + new_image = cv2.cvtColor(image.numpy(), color_mode) |
| 835 | + if color_mode == cv2.COLOR_BGR2GRAY: |
| 836 | + new_image = cv2.cvtColor(new_image, cv2.COLOR_GRAY2BGR) |
| 837 | + image.update(new_image) |
| 838 | + |
| 839 | + return image, annotation |
| 840 | + |
| 841 | + |
719 | 842 | class RandomAudioNoise(Augmentor):
|
720 | 843 | """ Randomly add noise to audio
|
721 | 844 |
|
|
0 commit comments