Skip to content

Commit

Permalink
improve coco to yolov5 conversion (#805)
Browse files Browse the repository at this point in the history
  • Loading branch information
fcakyon authored Jan 5, 2023
1 parent 0f3cbcb commit 422b287
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 70 deletions.
178 changes: 108 additions & 70 deletions sahi/utils/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy as np
from tqdm import tqdm

from sahi.utils.file import load_json, save_json
from sahi.utils.file import is_colab, load_json, save_json
from sahi.utils.shapely import ShapelyAnnotation, box, get_shapely_multipolygon

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -1225,7 +1225,7 @@ def split_coco_as_train_val(self, train_split_rate=0.9, numpy_seed=0):
"val_coco": val_coco,
}

def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=False):
def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=False, disable_symlink=False):
"""
Exports current COCO dataset in ultralytics/yolov5 format.
Creates train val folders with image symlinks and txt files and a data yaml file.
Expand All @@ -1242,6 +1242,8 @@ def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=Fals
mp: bool
If True, multiprocess mode is on.
Should be called in 'if __name__ == __main__:' block.
disable_symlink: bool
If True, symlinks will not be created. Instead, images will be copied.
"""
try:
import yaml
Expand Down Expand Up @@ -1292,13 +1294,15 @@ def export_as_yolov5(self, output_dir, train_split_rate=1, numpy_seed=0, mp=Fals
coco=train_coco,
ignore_negative_samples=self.ignore_negative_samples,
mp=mp,
disable_symlink=disable_symlink,
)
if split_mode in ["TRAINVAL", "VAL"]:
export_yolov5_images_and_txts_from_coco_object(
output_dir=val_dir,
coco=val_coco,
ignore_negative_samples=self.ignore_negative_samples,
mp=mp,
disable_symlink=disable_symlink,
)

# create yolov5 data yaml
Expand Down Expand Up @@ -1503,7 +1507,9 @@ def get_coco_with_clipped_bboxes(self):
return coco


def export_yolov5_images_and_txts_from_coco_object(output_dir, coco, ignore_negative_samples=False, mp=False):
def export_yolov5_images_and_txts_from_coco_object(
output_dir, coco, ignore_negative_samples=False, mp=False, disable_symlink=False
):
"""
Creates image symlinks and annotation txts in yolo format from coco dataset.
Expand All @@ -1517,20 +1523,28 @@ def export_yolov5_images_and_txts_from_coco_object(output_dir, coco, ignore_nega
mp: bool
If True, multiprocess mode is on.
Should be called in 'if __name__ == __main__:' block.
disable_symlink: bool
If True, symlinks are not created. Instead images are copied.
"""

print("generating image symlinks and annotation files for yolov5..."),
logger.info("generating image symlinks and annotation files for yolov5..."),
# symlink is not supported in colab
if is_colab() and not disable_symlink:
logger.warning("symlink is not supported in colab, disabling it...")
disable_symlink = True
if mp:
with Pool(processes=48) as pool:
args = [(coco_image, coco.image_dir, output_dir, ignore_negative_samples) for coco_image in coco.images]
args = [
(coco_image, coco.image_dir, output_dir, ignore_negative_samples, disable_symlink)
for coco_image in coco.images
]
pool.starmap(
export_single_yolov5_image_and_corresponding_txt,
tqdm(args, total=len(args)),
)
else:
for coco_image in tqdm(coco.images):
export_single_yolov5_image_and_corresponding_txt(
coco_image, coco.image_dir, output_dir, ignore_negative_samples
coco_image, coco.image_dir, output_dir, ignore_negative_samples, disable_symlink
)


Expand All @@ -1548,67 +1562,77 @@ def export_single_yolov5_image_and_corresponding_txt(
ignore_negative_samples: bool
If True ignores images without annotations in all operations.
"""
if not ignore_negative_samples or len(coco_image.annotations) > 0:
# skip images without suffix
# https://github.com/obss/sahi/issues/114
if Path(coco_image.file_name).suffix == "":
print(f"image file has no suffix, skipping it: '{coco_image.file_name}'")
return
elif Path(coco_image.file_name).suffix in [".txt"]: # TODO: extend this list
print(f"image file has incorrect suffix, skipping it: '{coco_image.file_name}'")
return
# set coco and yolo image paths
if Path(coco_image.file_name).is_file():
coco_image_path = os.path.abspath(coco_image.file_name)
else:
if coco_image_dir is None:
raise ValueError("You have to specify image_dir of Coco object for yolov5 conversion.")

coco_image_path = os.path.abspath(str(Path(coco_image_dir) / coco_image.file_name))

yolo_image_path_temp = str(Path(output_dir) / Path(coco_image.file_name).name)
# increment target file name if already present
yolo_image_path = copy.deepcopy(yolo_image_path_temp)
name_increment = 2
while Path(yolo_image_path).is_file():
parent_dir = Path(yolo_image_path_temp).parent
filename = Path(yolo_image_path_temp).stem
filesuffix = Path(yolo_image_path_temp).suffix
filename = filename + "_" + str(name_increment)
yolo_image_path = str(parent_dir / (filename + filesuffix))
name_increment += 1
# create a symbolic link pointing to coco_image_path named yolo_image_path
if disable_symlink:
import shutil

shutil.copy(coco_image_path, yolo_image_path)
else:
os.symlink(coco_image_path, yolo_image_path)
# calculate annotation normalization ratios
width = coco_image.width
height = coco_image.height
dw = 1.0 / (width)
dh = 1.0 / (height)
# set annotation filepath
image_file_suffix = Path(yolo_image_path).suffix
yolo_annotation_path = yolo_image_path.replace(image_file_suffix, ".txt")
# create annotation file
annotations = coco_image.annotations
with open(yolo_annotation_path, "w") as outfile:
for annotation in annotations:
# convert coco bbox to yolo bbox
x_center = annotation.bbox[0] + annotation.bbox[2] / 2.0
y_center = annotation.bbox[1] + annotation.bbox[3] / 2.0
bbox_width = annotation.bbox[2]
bbox_height = annotation.bbox[3]
x_center = x_center * dw
y_center = y_center * dh
bbox_width = bbox_width * dw
bbox_height = bbox_height * dh
category_id = annotation.category_id
yolo_bbox = (x_center, y_center, bbox_width, bbox_height)
# save yolo annotation
outfile.write(str(category_id) + " " + " ".join([str(value) for value in yolo_bbox]) + "\n")
# if coco_image contains any invalid annotations, skip it
contains_invalid_annotations = False
for coco_annotation in coco_image.annotations:
if len(coco_annotation.bbox) != 4:
contains_invalid_annotations = True
break
if contains_invalid_annotations:
return
# skip images without annotations
if len(coco_image.annotations) == 0 and ignore_negative_samples:
return
# skip images without suffix
# https://github.com/obss/sahi/issues/114
if Path(coco_image.file_name).suffix == "":
print(f"image file has no suffix, skipping it: '{coco_image.file_name}'")
return
elif Path(coco_image.file_name).suffix in [".txt"]: # TODO: extend this list
print(f"image file has incorrect suffix, skipping it: '{coco_image.file_name}'")
return
# set coco and yolo image paths
if Path(coco_image.file_name).is_file():
coco_image_path = os.path.abspath(coco_image.file_name)
else:
if coco_image_dir is None:
raise ValueError("You have to specify image_dir of Coco object for yolov5 conversion.")

coco_image_path = os.path.abspath(str(Path(coco_image_dir) / coco_image.file_name))

yolo_image_path_temp = str(Path(output_dir) / Path(coco_image.file_name).name)
# increment target file name if already present
yolo_image_path = copy.deepcopy(yolo_image_path_temp)
name_increment = 2
while Path(yolo_image_path).is_file():
parent_dir = Path(yolo_image_path_temp).parent
filename = Path(yolo_image_path_temp).stem
filesuffix = Path(yolo_image_path_temp).suffix
filename = filename + "_" + str(name_increment)
yolo_image_path = str(parent_dir / (filename + filesuffix))
name_increment += 1
# create a symbolic link pointing to coco_image_path named yolo_image_path
if disable_symlink:
import shutil

shutil.copy(coco_image_path, yolo_image_path)
else:
os.symlink(coco_image_path, yolo_image_path)
# calculate annotation normalization ratios
width = coco_image.width
height = coco_image.height
dw = 1.0 / (width)
dh = 1.0 / (height)
# set annotation filepath
image_file_suffix = Path(yolo_image_path).suffix
yolo_annotation_path = yolo_image_path.replace(image_file_suffix, ".txt")
# create annotation file
annotations = coco_image.annotations
with open(yolo_annotation_path, "w") as outfile:
for annotation in annotations:
# convert coco bbox to yolo bbox
x_center = annotation.bbox[0] + annotation.bbox[2] / 2.0
y_center = annotation.bbox[1] + annotation.bbox[3] / 2.0
bbox_width = annotation.bbox[2]
bbox_height = annotation.bbox[3]
x_center = x_center * dw
y_center = y_center * dh
bbox_width = bbox_width * dw
bbox_height = bbox_height * dh
category_id = annotation.category_id
yolo_bbox = (x_center, y_center, bbox_width, bbox_height)
# save yolo annotation
outfile.write(str(category_id) + " " + " ".join([str(value) for value in yolo_bbox]) + "\n")


def update_categories(desired_name2id: dict, coco_dict: dict) -> dict:
Expand Down Expand Up @@ -2270,7 +2294,12 @@ def remove_invalid_coco_results(result_list_or_path: Union[List, str], dataset_d


def export_coco_as_yolov5(
output_dir: str, train_coco: Coco = None, val_coco: Coco = None, train_split_rate: float = 0.9, numpy_seed=0
output_dir: str,
train_coco: Coco = None,
val_coco: Coco = None,
train_split_rate: float = 0.9,
numpy_seed=0,
disable_symlink=False,
):
"""
Exports current COCO dataset in ultralytics/yolov5 format.
Expand All @@ -2287,6 +2316,8 @@ def export_coco_as_yolov5(
train split rate between 0 and 1. will be used when val_coco is None.
numpy_seed: int
To fix the numpy seed.
disable_symlink: bool
If True, copy images instead of creating symlinks.
Returns:
yaml_path: str
Expand Down Expand Up @@ -2330,12 +2361,14 @@ def export_coco_as_yolov5(
coco=train_coco,
ignore_negative_samples=train_coco.ignore_negative_samples,
mp=False,
disable_symlink=disable_symlink,
)
export_yolov5_images_and_txts_from_coco_object(
output_dir=val_dir,
coco=val_coco,
ignore_negative_samples=val_coco.ignore_negative_samples,
mp=False,
disable_symlink=disable_symlink,
)

# create yolov5 data yaml
Expand All @@ -2352,7 +2385,9 @@ def export_coco_as_yolov5(
return yaml_path


def export_coco_as_yolov5_via_yml(yml_path: str, output_dir: str, train_split_rate: float = 0.9, numpy_seed=0):
def export_coco_as_yolov5_via_yml(
yml_path: str, output_dir: str, train_split_rate: float = 0.9, numpy_seed=0, disable_symlink=False
):
"""
Exports current COCO dataset in ultralytics/yolov5 format.
Creates train val folders with image symlinks and txt files and a data yaml file.
Expand All @@ -2371,6 +2406,8 @@ def export_coco_as_yolov5_via_yml(yml_path: str, output_dir: str, train_split_ra
train split rate between 0 and 1. will be used when val_json_path is None.
numpy_seed: int
To fix the numpy seed.
disable_symlink: bool
If True, copy images instead of creating symlinks.
Returns:
yaml_path: str
Expand Down Expand Up @@ -2406,6 +2443,7 @@ def export_coco_as_yolov5_via_yml(yml_path: str, output_dir: str, train_split_ra
val_coco=val_coco,
train_split_rate=train_split_rate,
numpy_seed=numpy_seed,
disable_symlink=disable_symlink,
)

return yaml_path
7 changes: 7 additions & 0 deletions sahi/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,10 @@ def download_from_url(from_url: str, to_path: str):
from_url,
to_path,
)


def is_colab():
import sys

# Is environment a Google Colab instance?
return "google.colab" in sys.modules

0 comments on commit 422b287

Please sign in to comment.