rename v1 to scenewise and v2 to imagewise

ethz-asl · May 22, 2023 · dcf1807 · dcf1807
1 parent 3c7dc08
commit dcf1807
Show file tree

Hide file tree

Showing 6 changed files with 153 additions and 74 deletions.
diff --git a/bop_toolkit_lib/dataset/bop_v2.py → bop_toolkit_lib/dataset/bop_imagewise.py b/bop_toolkit_lib/dataset/bop_v2.py → bop_toolkit_lib/dataset/bop_imagewise.py
@@ -1,3 +1,23 @@
+"""
+Tools to manipulate the bop-imagewise format
+
+bop-imagewise is a format where the image annotations are stored
+in individual files. This format is only used as an intermediate step
+to convert a bop-scenewise dataset to a bop-webdataset. 
+Format is the following:
+
+├─ dataset
+│  ├─ KEY.{rgb|gray}.{png|jpg}
+│  ├─ KEY.depth.png
+│  ├─ KEY.camera.json
+│  ├─ KEY.gt.json
+│  ├─ KEY.gt_info.json
+│  ├─ KEY.mask.json
+│  ├─ KEY.mask_visib.json
+    ... ,
+where KEY is a unique identifier of an image in the dataset. Typically it is {scene_id:06d}_{image_id:06d}.
+"""
+
 import json
 import pathlib
 
@@ -34,7 +54,7 @@ def save_scene_camera(
 ):
     """Saves scene_camera
     (typically found in scene_camera.json
-    in the BOP-v1 format) to individual files.
+    in the BOP-scenewise format) to individual files.
 
     :param scene_camera: scene_camera
     dict mapping image_ids to camera information.
@@ -54,7 +74,7 @@ def save_scene_gt(
 ):
     """Saves scene ground truth
     (typically found in scene_gt.json or
-    scene_gt_info.json in the BOP-v1 format) to individual files.
+    scene_gt_info.json in the BOP-scenewise format) to individual files.
 
     :param scene_camera: scene_gt
     dict mapping image_ids to gt information.

diff --git a/bop_toolkit_lib/dataset/bop_v1.py → bop_toolkit_lib/dataset/bop_scenewise.py b/bop_toolkit_lib/dataset/bop_v1.py → bop_toolkit_lib/dataset/bop_scenewise.py
@@ -1,3 +1,19 @@
+"""
+Tools to manipulate the bop-scenewise format
+
+bop-scenewise is the standard format described in docs/datasets_format
+
+├─ dataset
+│  ├─ SCENE_ID
+│  │  ├─ scene_camera.json
+│  │  ├─ scene_gt.json
+│  │  ├─ scene_gt_info.json
+│  │  ├─ depth
+│  │  ├─ mask
+│  │  ├─ mask_visib
+│  │  ├─ rgb|gray
+"""
+
 import json
 import pathlib
 import re

diff --git a/bop_toolkit_lib/dataset/bop_webdataset.py b/bop_toolkit_lib/dataset/bop_webdataset.py
@@ -1,11 +1,50 @@
+"""
+Tools to manipulate bop-webdataset format
+
+bop-webdataset is composed of several shards (a .tar file), each containing
+amaximum of 1000 images. Because images and annotations are stored in a .tar file,
+they can be read sequentially to achieve faster reading speeds compared to the other
+file formats.
+
+├─ dataset
+│  ├─ key_to_shard.json
+│  ├─ shard-000000.tar
+│  ├─ shard-000001.tar
+│  ├─ ...
+
+Each shard contains a chunk of the bop-imagewise format. The images are typically
+stored after shuffling, to achieve random sampling of the dataset even if the data
+is read sequentially. E.g.
+
+├─ shard-000000.tar
+│  ├─ 00004_00015.rgb.jpg
+│  ├─ 00004_00015.camera.json
+│  ├─ 00004_00015.gt.json
+│  ├─ 00004_00015.gt_info.json
+│  ├─ 00004_00015.mask.json
+│  ├─ 00004_00015.mask_visib.json
+│  ├─ 00021_00777.rgb.jpg
+│  ├─ 00021_00777.camera.json
+│  ├─ 00021_00777.gt.json
+│  ├─ 00021_00777.gt_info.json
+│  ├─ 00021_00777.mask.json
+│  ├─ 00021_00777.mask_visib.json
+
+
+The file key_to_shard.json maps an image key to the index of the shard
+where it is stored. This can be used to read an individual image directly in a
+.tar file, but beware that this may be slow because random access in a .tar file
+requires to seek the correpsonding file in the entire byte sequence.
+"""
+
 import json
 import io
 import tarfile
 
 import numpy as np
 
 from bop_toolkit_lib import inout
-from bop_toolkit_lib.dataset import bop_v2
+from bop_toolkit_lib.dataset import bop_imagewise
 
 
 def decode_sample(
@@ -56,22 +95,22 @@ def decode_sample(
         image_data['im_depth'] = im_depth
 
     if decode_gt:
-        image_data['gt'] = bop_v2.io_load_gt(
+        image_data['gt'] = bop_imagewise.io_load_gt(
             io.BytesIO(sample['gt.json']),
             instance_ids=instance_ids)
 
     if decode_gt_info:
-        image_data['gt_info'] = bop_v2.io_load_gt(
+        image_data['gt_info'] = bop_imagewise.io_load_gt(
             io.BytesIO(sample['gt_info.json']),
             instance_ids=instance_ids)
 
     if decode_mask_visib:
-        image_data['mask_visib'] = bop_v2.io_load_masks(
+        image_data['mask_visib'] = bop_imagewise.io_load_masks(
             io.BytesIO(sample['mask_visib.json']),
             instance_ids=instance_ids)
 
     if decode_mask:
-        image_data['mask'] = bop_v2.io_load_masks(
+        image_data['mask'] = bop_imagewise.io_load_masks(
             io.BytesIO(sample['mask.json']),
             instance_ids=instance_ids)
 
@@ -130,22 +169,22 @@ def _load(ext, read=True):
         image_data['im_depth'] = im_depth
 
     if load_gt:
-        image_data['gt'] = bop_v2.io_load_gt(
+        image_data['gt'] = bop_imagewise.io_load_gt(
             _load('gt.json', read=False),
             instance_ids=instance_ids)
 
     if load_gt_info:
-        image_data['gt_info'] = bop_v2.io_load_gt(
+        image_data['gt_info'] = bop_imagewise.io_load_gt(
             _load('gt_info.json', read=False),
             instance_ids=instance_ids)
 
     if load_mask_visib:
-        image_data['mask_visib'] = bop_v2.io_load_masks(
+        image_data['mask_visib'] = bop_imagewise.io_load_masks(
             _load('mask_visib.json', read=False),
             instance_ids=instance_ids)
 
     if load_mask:
-        image_data['mask'] = bop_v2.io_load_masks(
+        image_data['mask'] = bop_imagewise.io_load_masks(
             _load('mask.json', read=False),
             instance_ids=instance_ids)
 

diff --git a/...t_lib/dataset/convert_v2_to_webdataset.py → ...ataset/convert_imagewise_to_webdataset.py b/...t_lib/dataset/convert_v2_to_webdataset.py → ...ataset/convert_imagewise_to_webdataset.py
@@ -8,16 +8,16 @@
 import numpy as np
 import webdataset as wds
 
-from bop_toolkit_lib.dataset import bop_v2
+from bop_toolkit_lib.dataset import bop_imagewise
 
 
 def parse_args():
     parser = argparse.ArgumentParser(
-        prog="v2 -> webdataset converter utility",
+        prog="bop-imagewise -> bop-webdataset converter utility",
     )
     parser.add_argument(
         "--input",
-        help="""A directory containing a dataset in v2 format,
+        help="""A directory containing a dataset in imagewise format,
         e.g. ./ycbv/train_pbr_v2format.
         """,
         type=str,
@@ -73,8 +73,8 @@ def make_key_to_shard_map(
     return key_to_shard
 
 
-def convert_v2_to_webdataset(
-    v2_dir,
+def convert_imagewise_to_webdataset(
+    input_dir,
     wds_dir,
     image_keys,
     start_shard,
@@ -87,13 +87,13 @@ def convert_v2_to_webdataset(
         maxcount=maxcount,
         encoder=False
     )
-    infos = bop_v2.load_image_infos(
-        v2_dir, image_keys[0])
+    infos = bop_imagewise.load_image_infos(
+        input_dir, image_keys[0])
 
     for key in image_keys:
 
         def _file_path(ext):
-            return v2_dir / f'{key}.{ext}'
+            return input_dir / f'{key}.{ext}'
 
         obj = {
             '__key__': key,
@@ -137,11 +137,11 @@ def _file_path(ext):
 def main():
     args = parse_args()
 
-    v2_dir = pathlib.Path(args.input)
+    input_dir = pathlib.Path(args.input)
     wds_dir = pathlib.Path(args.output)
 
-    v2_file_paths = v2_dir.glob('*')
-    keys = set([p.name.split('.')[0] for p in v2_file_paths])
+    input_file_paths = input_dir.glob('*')
+    keys = set([p.name.split('.')[0] for p in input_file_paths])
     keys = list(keys)
 
     if args.shuffle:
@@ -154,7 +154,7 @@ def main():
         for keys_split in keys_splits:
             _args.append(
                 (
-                    v2_dir,
+                    input_dir,
                     wds_dir,
                     keys_split,
                     start_shard,
@@ -165,12 +165,12 @@ def main():
             start_shard += n_shards
         with multiprocessing.Pool(processes=args.nprocs) as pool:
             pool.starmap(
-                convert_v2_to_webdataset,
+                convert_imagewise_to_webdataset,
                 iterable=_args
             )
     else:
-        convert_v2_to_webdataset(
-            v2_dir,
+        convert_imagewise_to_webdataset(
+            input_dir,
             wds_dir,
             keys,
             0,