diff --git a/sahi/utils/coco.py b/sahi/utils/coco.py index 5fe15518..c68fa207 100644 --- a/sahi/utils/coco.py +++ b/sahi/utils/coco.py @@ -14,7 +14,7 @@ import numpy as np from tqdm import tqdm -from sahi.utils.file import load_json, save_json +from sahi.utils.file import is_colab, load_json, save_json from sahi.utils.shapely import ShapelyAnnotation, box, get_shapely_multipolygon logger = logging.getLogger(__name__) @@ -1225,7 +1225,7 @@ def split_coco_as_train_val(self, train_split_rate=0.9, numpy_seed=0): "val_coco": val_coco, } - def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=False): + def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=False, disable_symlink=False): """ Exports current COCO dataset in ultralytics/yolov5 format. Creates train val folders with image symlinks and txt files and a data yaml file. @@ -1242,6 +1242,8 @@ def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=Fals mp: bool If True, multiprocess mode is on. Should be called in 'if __name__ == __main__:' block. + disable_symlink: bool + If True, symlinks will not be created. Instead, images will be copied. """ try: import yaml @@ -1292,6 +1294,7 @@ def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=Fals coco=train_coco, ignore_negative_samples=self.ignore_negative_samples, mp=mp, + disable_symlink=disable_symlink, ) if split_mode in ["TRAINVAL", "VAL"]: export_yolov5_images_and_txts_from_coco_object( @@ -1299,6 +1302,7 @@ def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=Fals coco=val_coco, ignore_negative_samples=self.ignore_negative_samples, mp=mp, + disable_symlink=disable_symlink, ) # create yolov5 data yaml @@ -1503,7 +1507,9 @@ def get_coco_with_clipped_bboxes(self): return coco -def export_yolov5_images_and_txts_from_coco_object(output_dir, coco, ignore_negative_samples=False, mp=False): +def export_yolov5_images_and_txts_from_coco_object( + output_dir, coco, ignore_negative_samples=False, mp=False, disable_symlink=False +): """ Creates image symlinks and annotation txts in yolo format from coco dataset. @@ -1517,12 +1523,20 @@ def export_yolov5_images_and_txts_from_coco_object(output_dir, coco, ignore_nega mp: bool If True, multiprocess mode is on. Should be called in 'if __name__ == __main__:' block. + disable_symlink: bool + If True, symlinks are not created. Instead images are copied. """ - - print("generating image symlinks and annotation files for yolov5..."), + logger.info("generating image symlinks and annotation files for yolov5..."), + # symlink is not supported in colab + if is_colab() and not disable_symlink: + logger.warning("symlink is not supported in colab, disabling it...") + disable_symlink = True if mp: with Pool(processes=48) as pool: - args = [(coco_image, coco.image_dir, output_dir, ignore_negative_samples) for coco_image in coco.images] + args = [ + (coco_image, coco.image_dir, output_dir, ignore_negative_samples, disable_symlink) + for coco_image in coco.images + ] pool.starmap( export_single_yolov5_image_and_corresponding_txt, tqdm(args, total=len(args)), @@ -1530,7 +1544,7 @@ def export_yolov5_images_and_txts_from_coco_object(output_dir, coco, ignore_nega else: for coco_image in tqdm(coco.images): export_single_yolov5_image_and_corresponding_txt( - coco_image, coco.image_dir, output_dir, ignore_negative_samples + coco_image, coco.image_dir, output_dir, ignore_negative_samples, disable_symlink ) @@ -1548,67 +1562,77 @@ def export_single_yolov5_image_and_corresponding_txt( ignore_negative_samples: bool If True ignores images without annotations in all operations. """ - if not ignore_negative_samples or len(coco_image.annotations) > 0: - # skip images without suffix - # https://github.com/obss/sahi/issues/114 - if Path(coco_image.file_name).suffix == "": - print(f"image file has no suffix, skipping it: '{coco_image.file_name}'") - return - elif Path(coco_image.file_name).suffix in [".txt"]: # TODO: extend this list - print(f"image file has incorrect suffix, skipping it: '{coco_image.file_name}'") - return - # set coco and yolo image paths - if Path(coco_image.file_name).is_file(): - coco_image_path = os.path.abspath(coco_image.file_name) - else: - if coco_image_dir is None: - raise ValueError("You have to specify image_dir of Coco object for yolov5 conversion.") - - coco_image_path = os.path.abspath(str(Path(coco_image_dir) / coco_image.file_name)) - - yolo_image_path_temp = str(Path(output_dir) / Path(coco_image.file_name).name) - # increment target file name if already present - yolo_image_path = copy.deepcopy(yolo_image_path_temp) - name_increment = 2 - while Path(yolo_image_path).is_file(): - parent_dir = Path(yolo_image_path_temp).parent - filename = Path(yolo_image_path_temp).stem - filesuffix = Path(yolo_image_path_temp).suffix - filename = filename + "_" + str(name_increment) - yolo_image_path = str(parent_dir / (filename + filesuffix)) - name_increment += 1 - # create a symbolic link pointing to coco_image_path named yolo_image_path - if disable_symlink: - import shutil - - shutil.copy(coco_image_path, yolo_image_path) - else: - os.symlink(coco_image_path, yolo_image_path) - # calculate annotation normalization ratios - width = coco_image.width - height = coco_image.height - dw = 1.0 / (width) - dh = 1.0 / (height) - # set annotation filepath - image_file_suffix = Path(yolo_image_path).suffix - yolo_annotation_path = yolo_image_path.replace(image_file_suffix, ".txt") - # create annotation file - annotations = coco_image.annotations - with open(yolo_annotation_path, "w") as outfile: - for annotation in annotations: - # convert coco bbox to yolo bbox - x_center = annotation.bbox[0] + annotation.bbox[2] / 2.0 - y_center = annotation.bbox[1] + annotation.bbox[3] / 2.0 - bbox_width = annotation.bbox[2] - bbox_height = annotation.bbox[3] - x_center = x_center * dw - y_center = y_center * dh - bbox_width = bbox_width * dw - bbox_height = bbox_height * dh - category_id = annotation.category_id - yolo_bbox = (x_center, y_center, bbox_width, bbox_height) - # save yolo annotation - outfile.write(str(category_id) + " " + " ".join([str(value) for value in yolo_bbox]) + "\n") + # if coco_image contains any invalid annotations, skip it + contains_invalid_annotations = False + for coco_annotation in coco_image.annotations: + if len(coco_annotation.bbox) != 4: + contains_invalid_annotations = True + break + if contains_invalid_annotations: + return + # skip images without annotations + if len(coco_image.annotations) == 0 and ignore_negative_samples: + return + # skip images without suffix + # https://github.com/obss/sahi/issues/114 + if Path(coco_image.file_name).suffix == "": + print(f"image file has no suffix, skipping it: '{coco_image.file_name}'") + return + elif Path(coco_image.file_name).suffix in [".txt"]: # TODO: extend this list + print(f"image file has incorrect suffix, skipping it: '{coco_image.file_name}'") + return + # set coco and yolo image paths + if Path(coco_image.file_name).is_file(): + coco_image_path = os.path.abspath(coco_image.file_name) + else: + if coco_image_dir is None: + raise ValueError("You have to specify image_dir of Coco object for yolov5 conversion.") + + coco_image_path = os.path.abspath(str(Path(coco_image_dir) / coco_image.file_name)) + + yolo_image_path_temp = str(Path(output_dir) / Path(coco_image.file_name).name) + # increment target file name if already present + yolo_image_path = copy.deepcopy(yolo_image_path_temp) + name_increment = 2 + while Path(yolo_image_path).is_file(): + parent_dir = Path(yolo_image_path_temp).parent + filename = Path(yolo_image_path_temp).stem + filesuffix = Path(yolo_image_path_temp).suffix + filename = filename + "_" + str(name_increment) + yolo_image_path = str(parent_dir / (filename + filesuffix)) + name_increment += 1 + # create a symbolic link pointing to coco_image_path named yolo_image_path + if disable_symlink: + import shutil + + shutil.copy(coco_image_path, yolo_image_path) + else: + os.symlink(coco_image_path, yolo_image_path) + # calculate annotation normalization ratios + width = coco_image.width + height = coco_image.height + dw = 1.0 / (width) + dh = 1.0 / (height) + # set annotation filepath + image_file_suffix = Path(yolo_image_path).suffix + yolo_annotation_path = yolo_image_path.replace(image_file_suffix, ".txt") + # create annotation file + annotations = coco_image.annotations + with open(yolo_annotation_path, "w") as outfile: + for annotation in annotations: + # convert coco bbox to yolo bbox + x_center = annotation.bbox[0] + annotation.bbox[2] / 2.0 + y_center = annotation.bbox[1] + annotation.bbox[3] / 2.0 + bbox_width = annotation.bbox[2] + bbox_height = annotation.bbox[3] + x_center = x_center * dw + y_center = y_center * dh + bbox_width = bbox_width * dw + bbox_height = bbox_height * dh + category_id = annotation.category_id + yolo_bbox = (x_center, y_center, bbox_width, bbox_height) + # save yolo annotation + outfile.write(str(category_id) + " " + " ".join([str(value) for value in yolo_bbox]) + "\n") def update_categories(desired_name2id: dict, coco_dict: dict) -> dict: @@ -2270,7 +2294,12 @@ def remove_invalid_coco_results(result_list_or_path: Union[List, str], dataset_d def export_coco_as_yolov5( - output_dir: str, train_coco: Coco = None, val_coco: Coco = None, train_split_rate: float = 0.9, numpy_seed=0 + output_dir: str, + train_coco: Coco = None, + val_coco: Coco = None, + train_split_rate: float = 0.9, + numpy_seed=0, + disable_symlink=False, ): """ Exports current COCO dataset in ultralytics/yolov5 format. @@ -2287,6 +2316,8 @@ def export_coco_as_yolov5( train split rate between 0 and 1. will be used when val_coco is None. numpy_seed: int To fix the numpy seed. + disable_symlink: bool + If True, copy images instead of creating symlinks. Returns: yaml_path: str @@ -2330,12 +2361,14 @@ def export_coco_as_yolov5( coco=train_coco, ignore_negative_samples=train_coco.ignore_negative_samples, mp=False, + disable_symlink=disable_symlink, ) export_yolov5_images_and_txts_from_coco_object( output_dir=val_dir, coco=val_coco, ignore_negative_samples=val_coco.ignore_negative_samples, mp=False, + disable_symlink=disable_symlink, ) # create yolov5 data yaml @@ -2352,7 +2385,9 @@ def export_coco_as_yolov5( return yaml_path -def export_coco_as_yolov5_via_yml(yml_path: str, output_dir: str, train_split_rate: float = 0.9, numpy_seed=0): +def export_coco_as_yolov5_via_yml( + yml_path: str, output_dir: str, train_split_rate: float = 0.9, numpy_seed=0, disable_symlink=False +): """ Exports current COCO dataset in ultralytics/yolov5 format. Creates train val folders with image symlinks and txt files and a data yaml file. @@ -2371,6 +2406,8 @@ def export_coco_as_yolov5_via_yml(yml_path: str, output_dir: str, train_split_ra train split rate between 0 and 1. will be used when val_json_path is None. numpy_seed: int To fix the numpy seed. + disable_symlink: bool + If True, copy images instead of creating symlinks. Returns: yaml_path: str @@ -2406,6 +2443,7 @@ def export_coco_as_yolov5_via_yml(yml_path: str, output_dir: str, train_split_ra val_coco=val_coco, train_split_rate=train_split_rate, numpy_seed=numpy_seed, + disable_symlink=disable_symlink, ) return yaml_path diff --git a/sahi/utils/file.py b/sahi/utils/file.py index 8c1afc47..ec09a607 100644 --- a/sahi/utils/file.py +++ b/sahi/utils/file.py @@ -236,3 +236,10 @@ def download_from_url(from_url: str, to_path: str): from_url, to_path, ) + + +def is_colab(): + import sys + + # Is environment a Google Colab instance? + return "google.colab" in sys.modules