diff --git a/ssd300_evaluation_COCO.ipynb b/ssd300_evaluation_COCO.ipynb index ea5abd02..124553e3 100644 --- a/ssd300_evaluation_COCO.ipynb +++ b/ssd300_evaluation_COCO.ipynb @@ -18,7 +18,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from keras import backend as K\n", @@ -108,7 +110,6 @@ " offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n", " clip_boxes=False,\n", " variances=[0.1, 0.1, 0.2, 0.2],\n", - " coords='centroids',\n", " normalize_coords=True,\n", " subtract_mean=[123, 117, 104],\n", " swap_channels=[2, 1, 0],\n", @@ -186,7 +187,7 @@ }, "outputs": [], "source": [ - "dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n", + "dataset = DataGenerator()\n", "\n", "# TODO: Set the paths to the dataset here.\n", "MS_COCO_dataset_images_dir = '../../datasets/MicrosoftCOCO/val2017/'\n", @@ -254,7 +255,6 @@ " confidence_thresh=0.01,\n", " iou_threshold=0.45,\n", " top_k=200,\n", - " pred_coords='centroids',\n", " normalize_coords=True)" ] }, diff --git a/ssd300_evaluation_Pascal_VOC.ipynb b/ssd300_evaluation_Pascal_VOC.ipynb index e3e77852..981570c7 100644 --- a/ssd300_evaluation_Pascal_VOC.ipynb +++ b/ssd300_evaluation_Pascal_VOC.ipynb @@ -14,7 +14,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "from keras import backend as K\n", @@ -70,7 +72,9 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "# 1: Build the Keras model\n", @@ -93,7 +97,6 @@ " offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n", " clip_boxes=False,\n", " variances=[0.1, 0.1, 0.2, 0.2],\n", - " coords='centroids',\n", " normalize_coords=True,\n", " subtract_mean=[123, 117, 104],\n", " swap_channels=[2, 1, 0],\n", @@ -177,7 +180,7 @@ } ], "source": [ - "dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n", + "dataset = DataGenerator()\n", "\n", "# TODO: Set the paths to the dataset here.\n", "Pascal_VOC_dataset_images_dir = '../../datasets/VOCdevkit/VOC2007/JPEGImages/'\n", @@ -253,7 +256,6 @@ " confidence_thresh=0.01,\n", " iou_threshold=0.45,\n", " top_k=200,\n", - " pred_coords='centroids',\n", " normalize_coords=True)" ] }, diff --git a/ssd300_inference.ipynb b/ssd300_inference.ipynb index a6926b04..92fa6199 100644 --- a/ssd300_inference.ipynb +++ b/ssd300_inference.ipynb @@ -101,7 +101,6 @@ " offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n", " clip_boxes=False,\n", " variances=[0.1, 0.1, 0.2, 0.2],\n", - " coords='centroids',\n", " normalize_coords=True,\n", " subtract_mean=[123, 117, 104],\n", " swap_channels=[2, 1, 0],\n", @@ -324,7 +323,7 @@ "source": [ "# Create a `BatchGenerator` instance and parse the Pascal VOC labels.\n", "\n", - "dataset = DataGenerator(labels_output_format=('class_id', 'xmin', 'ymin', 'xmax', 'ymax'))\n", + "dataset = DataGenerator()\n", "\n", "# TODO: Set the paths to the datasets here.\n", "\n", diff --git a/ssd300_training.ipynb b/ssd300_training.ipynb index 1c18556c..3492d2ce 100644 --- a/ssd300_training.ipynb +++ b/ssd300_training.ipynb @@ -96,7 +96,6 @@ "offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.\n", "clip_boxes = False # Whether or not to clip ground truth and anchor boxes to lie entirely within the image boundaries\n", "variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation\n", - "coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids', 'corners', or 'minmax' format, see documentation\n", "normalize_coords = True" ] }, @@ -150,7 +149,6 @@ " offsets=offsets,\n", " clip_boxes=clip_boxes,\n", " variances=variances,\n", - " coords=coords,\n", " normalize_coords=normalize_coords,\n", " subtract_mean=mean_color,\n", " swap_channels=swap_channels)\n", @@ -226,20 +224,6 @@ "In order to train the model on a dataset other than Pascal VOC, either choose `DataGenerator`'s appropriate parser method that corresponds to your data format, or, if `DataGenerator` does not provide a suitable parser for your data format, you can write an additional parser and add it. Out of the box, `DataGenerator` can handle datasets that use the Pascal VOC format (use `parse_xml()`), the MS COCO format (use `parse_json()`) and a wide range of CSV formats (use `parse_csv()`)." ] }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n", - "\n", - "train_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n", - "val_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])" - ] - }, { "cell_type": "code", "execution_count": 12, @@ -256,6 +240,11 @@ } ], "source": [ + "# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n", + "\n", + "train_dataset = DataGenerator()\n", + "val_dataset = DataGenerator()\n", + "\n", "# 2: Parse the image and label lists for the training and validation datasets. This can take a while.\n", "\n", "# TODO: Set the paths to the datasets here.\n", @@ -360,7 +349,6 @@ " variances=variances,\n", " pos_iou_threshold=0.5,\n", " neg_iou_limit=0.4,\n", - " coords=coords,\n", " normalize_coords=normalize_coords)\n", "\n", "# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.\n", @@ -578,7 +566,6 @@ " confidence_thresh=0.5,\n", " iou_threshold=0.4,\n", " top_k=200,\n", - " input_coords='centroids',\n", " normalize_coords=normalize_coords,\n", " img_height=img_height,\n", " img_width=img_width)" diff --git a/ssd512_inference.ipynb b/ssd512_inference.ipynb index c1375601..5035da83 100644 --- a/ssd512_inference.ipynb +++ b/ssd512_inference.ipynb @@ -102,7 +102,6 @@ " offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],\n", " clip_boxes=False,\n", " variances=[0.1, 0.1, 0.2, 0.2],\n", - " coords='centroids',\n", " normalize_coords=True,\n", " subtract_mean=[123, 117, 104],\n", " swap_channels=[2, 1, 0],\n", @@ -325,7 +324,7 @@ "source": [ "# Create a `BatchGenerator` instance and parse the Pascal VOC labels.\n", "\n", - "dataset = DataGenerator(labels_output_format=('class_id', 'xmin', 'ymin', 'xmax', 'ymax'))\n", + "dataset = DataGenerator()\n", "\n", "# TODO: Set the paths to the datasets here.\n", "\n", diff --git a/ssd7_training.ipynb b/ssd7_training.ipynb index 860997e3..0b80740a 100644 --- a/ssd7_training.ipynb +++ b/ssd7_training.ipynb @@ -71,7 +71,6 @@ "* If `two_boxes_for_ar1 == True`, then each predictor layer will predict two boxes with aspect ratio one, one a bit smaller, the other one a bit larger.\n", "* If `clip_boxes == True`, then the ground truth and anchor boxes will be clipped so that they lie entirely within the image boundaries. Even though it may seem counterintuitive at first, it is recommended not to clip the boxes. According to Wei Liu, the model performs slightly better when the boxes are not clipped.\n", "* In the matching process during the training, the anchor box offsets are being divided by the variances. Leaving them at 1.0 for each of the four box coordinates means that they have no effect. Setting them to less than 1.0 spreads the imagined anchor box offset distribution for the respective box coordinate.\n", - "* The `coords` argument lets you choose what coordinate format the model should learn internally. If you choose the 'centroids' format, the targets will be converted to the `(cx, cy, w, h)` coordinate format used in the original implementation. Note that the coordinate format that the model learns to predict and the coordinate format that the model outputs are not necessarily the same. For instance, the original SSD300 learns to predict `(cx, cy, w, h)` internally, but in the decoding stage, those predictions are converted to `(xmin, ymin, xmax, ymax)`, so that is what the model outputs at the end.\n", "* `normalize_coords` converts all coordinates from absolute coordinate to coordinates that are relative to the image height and width. This setting has no effect on the outcome of the training." ] }, @@ -96,7 +95,6 @@ "offsets = None # In case you'd like to set the offsets for the anchor box grids manually; not recommended\n", "clip_boxes = False # Whether or not to clip the ground truth and anchor boxes to lie entirely within the image boundaries\n", "variances = [1.0, 1.0, 1.0, 1.0] # The list of variances by which the encoded target coordinates are scaled\n", - "coords = 'centroids' # Whether the box coordinates to be used should be in the 'centroids' or 'minmax' format, see documentation\n", "normalize_coords = True # Whether or not the model is supposed to use coordinates relative to the image size" ] }, @@ -149,7 +147,6 @@ " offsets=offsets,\n", " clip_boxes=clip_boxes,\n", " variances=variances,\n", - " coords=coords,\n", " normalize_coords=normalize_coords,\n", " subtract_mean=intensity_mean,\n", " divide_by_stddev=intensity_range)\n", @@ -226,20 +223,6 @@ "The example setup below was used to train SSD7 on two road traffic datasets released by [Udacity](https://github.com/udacity/self-driving-car/tree/master/annotations) with around 20,000 images in total and 5 object classes (car, truck, pedestrian, bicyclist, traffic light), although the vast majority of the objects are cars. The original datasets have a constant image size of 1200x1920 RGB. I consolidated the two datasets, removed a few bad samples (although there are probably many more), and resized the images to 300x480 RGB, i.e. to one sixteenth of the original image size. In case you'd like to train a model on the same dataset, you can download the consolidated and resized dataset I used [here](https://drive.google.com/open?id=1uOqIUiJlDwoeL8vnNMacNbkDpDe1eRp-) (about 900 MB)." ] }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n", - "\n", - "train_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n", - "val_dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])" - ] - }, { "cell_type": "code", "execution_count": 5, @@ -255,6 +238,11 @@ } ], "source": [ + "# 1: Instantiate to `BatchGenerator` objects: One for training, one for validation.\n", + "\n", + "train_dataset = DataGenerator()\n", + "val_dataset = DataGenerator()\n", + "\n", "# 2: Parse the image and label lists for the training and validation datasets.\n", "\n", "# TODO: Set the paths to your dataset here.\n", @@ -334,7 +322,6 @@ " variances=variances,\n", " pos_iou_threshold=0.5,\n", " neg_iou_limit=0.3,\n", - " coords=coords,\n", " normalize_coords=normalize_coords)\n", "\n", "# 6: Create the generator handles that will be passed to Keras' `fit_generator()` function.\n", @@ -410,6 +397,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "collapsed": true, "scrolled": true }, "outputs": [], @@ -593,8 +581,7 @@ " confidence_thresh=0.5,\n", " iou_threshold=0.45,\n", " top_k=200,\n", - " input_coords='centroids',\n", - " normalize_coords=True,\n", + " normalize_coords=normalize_coords,\n", " img_height=img_height,\n", " img_width=img_width)\n", "\n", diff --git a/weight_sampling_tutorial.ipynb b/weight_sampling_tutorial.ipynb index 9dd9df51..7db1808e 100644 --- a/weight_sampling_tutorial.ipynb +++ b/weight_sampling_tutorial.ipynb @@ -472,7 +472,6 @@ "offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.\n", "clip_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries\n", "variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are scaled as in the original implementation\n", - "coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids', 'corners', or 'minmax' format, see documentation\n", "normalize_coords = True" ] }, @@ -508,7 +507,6 @@ " offsets=offsets,\n", " clip_boxes=clip_boxes,\n", " variances=variances,\n", - " coords=coords,\n", " normalize_coords=normalize_coords,\n", " subtract_mean=subtract_mean,\n", " divide_by_stddev=None,\n", @@ -562,7 +560,7 @@ } ], "source": [ - "dataset = DataGenerator(labels_output_format=['class_id', 'xmin', 'ymin', 'xmax', 'ymax'])\n", + "dataset = DataGenerator()\n", "\n", "# TODO: Set the paths to your dataset here.\n", "images_path = '../../datasets/Udacity_Driving/driving_dataset_consolidated_small/'\n",