diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..6c993b7
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..36f8de4
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/segmentation.iml b/.idea/segmentation.iml
new file mode 100644
index 0000000..33f520b
--- /dev/null
+++ b/.idea/segmentation.iml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000..309f4e5
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,369 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ weights
+ loss_op
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1530817280059
+
+
+ 1530817280059
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dataset/data_reader.py b/dataset/data_reader.py
new file mode 100644
index 0000000..6f782ff
--- /dev/null
+++ b/dataset/data_reader.py
@@ -0,0 +1,259 @@
+import os
+import glob
+import numpy as np
+import tensorflow as tf
+import functools
+from utils.dataset_util import (
+ rotate, random_hue, random_contrast, random_brightness)
+
+
+slim_example_decoder = tf.contrib.slim.tfexample_decoder
+
+
+class SegDataReader(object):
+ def __init__(self, data_cfg):
+ self.data_cfg = data_cfg
+ self.datasets = []
+
+ for dataset in data_cfg.datasets:
+ data_dir = dataset['data_dir']
+ name = dataset['name']
+ weight = dataset['weight']
+ img_files = os.path.join(
+ data_dir, dataset['img_glob_string'])
+ mask_files = dataset['img_glob_string']
+ if mask_files is not None:
+ img_files = os.path.join(
+ data_dir, dataset['img_glob_string'])
+ tfrecord_file = dataset['tfrecord_file']
+ if (tfrecord_file is None) or dataset['overwrite_tfrecord']:
+ tfrecord_name = os.path.basename(data_dir) + '.records'
+ sub_dir = os.path.dirname(dataset['tfrecord_files'])
+ tfrecord_path = os.path.join(data_dir, sub_dir, tfrecord_name)
+ tfrecord_dir = os.path.dirname(tfrecord_path)
+ if not os.path.exists(tfrecord_dir):
+ os.makedirs(tfrecord_dir)
+ ds = self.add_dataset(name, img_files)
+ ds.create_tf_record(tfrecord_path)
+ self.datasets.append({'name': name,
+ 'tfrecord_path': tfrecord_path,
+ 'weight': weight})
+
+ def add_dataset(self, name, img_files, mask_files):
+ if name == 'png_objects':
+ ds = PNGobjects(self.data_cfg, img_files, mask_files)
+ else:
+ raise RuntimeError('Dataset not supported')
+ return ds
+
+ def _get_probs(self):
+ probs = [ds['weight'] for ds in self.datasets]
+ probs = np.array(probs)
+ return probs / np.sum(probs)
+
+ @staticmethod
+ def _get_tensor(tensor):
+ if isinstance(tensor, tf.SparseTensor):
+ return tf.sparse_tensor_to_dense(tensor)
+ return tensor
+
+ @staticmethod
+ def _image_decoder(keys_to_tensors):
+ filename = keys_to_tensors['image/filename']
+ image_string = tf.read_file(filename)
+ # TODO: decode after crop to increase speed
+ image_decoded = tf.image.decode_jpeg(image_string, channels=3)
+ return image_decoded
+
+ @staticmethod
+ def _mask_decoder(keys_to_tensors):
+ mask_x = SegDataReader._get_tensor(
+ keys_to_tensors['image/mask/x'])
+ mask_y = SegDataReader._get_tensor(
+ keys_to_tensors['image/mask/y'])
+ shape = SegDataReader._get_tensor(
+ keys_to_tensors['image/shape'])
+
+ indices = tf.stack([mask_x, mask_y], axis=1)
+ values = tf.zeros_like(mask_x)
+ mask = tf.SparseTensor(indices=indices, values=values,
+ dense_shape=shape)
+ # TODO: possibly do sparse to dense coversion after crop
+ mask = tf.sparse_tensor_to_dense(mask, default_value=1)
+ return tf.cast(mask, tf.int32)
+
+ @staticmethod
+ def _keypoints_decoder(keys_to_tensor, num_keypoints=15):
+ keypoints = keys_to_tensor['image/person/keypoints']
+ img_shape = keys_to_tensor['image/shape']
+ num_instances = PoseDataReader._get_tensor(
+ keys_to_tensor['image/num_instances'])
+ shape = [num_instances] + [num_keypoints, 3]
+ if isinstance(keypoints, tf.SparseTensor):
+ keypoints = tf.sparse_reshape(keypoints, shape=shape)
+ keypoints = tf.sparse_tensor_to_dense(keypoints)
+ else:
+ keypoints = tf.reshape(keypoints, shape=shape)
+ keypoints = normalize_keypoints(keypoints, img_shape)
+ return keypoints
+
+ @staticmethod
+ def _bbox_decoder(keys_to_tensor):
+ bbox = keys_to_tensor['image/person/bbox']
+ img_shape = keys_to_tensor['image/shape']
+ num_instances = PoseDataReader._get_tensor(
+ keys_to_tensor['image/num_instances'])
+ shape = [num_instances] + [4]
+ if isinstance(bbox, tf.SparseTensor):
+ bbox = tf.sparse_reshape(bbox, shape=shape)
+ bbox = tf.sparse_tensor_to_dense(bbox)
+ else:
+ bbox = tf.reshape(bbox, shape=shape)
+ bbox = normalize_bboxes(bbox, img_shape)
+ return bbox
+
+ def _decoder(self):
+ keys_to_features = {
+ 'image/filename':
+ tf.FixedLenFeature((), tf.string, default_value=''),
+ 'image/shape':
+ tf.FixedLenFeature([2], tf.int64),
+ 'image/num_instances':
+ tf.FixedLenFeature((), tf.int64),
+ 'image/person/bbox':
+ tf.VarLenFeature(tf.float32),
+ 'image/person/keypoints':
+ tf.VarLenFeature(tf.float32),
+ 'image/mask/x':
+ tf.VarLenFeature(tf.int64),
+ 'image/mask/y':
+ tf.VarLenFeature(tf.int64)
+ }
+ keypoints_decoder = functools.partial(
+ self._keypoints_decoder, num_keypoints=self.num_keypoints)
+ items_to_handlers = {
+ 'image': slim_example_decoder.ItemHandlerCallback(
+ 'image/filename', self._image_decoder),
+ 'mask': slim_example_decoder.ItemHandlerCallback(
+ ['image/mask/x', 'image/mask/y', 'image/shape'],
+ self._mask_decoder),
+ 'keypoints': slim_example_decoder.ItemHandlerCallback(
+ ['image/person/keypoints', 'image/num_instances',
+ 'image/shape'], keypoints_decoder),
+ 'bbox': slim_example_decoder.ItemHandlerCallback(
+ ['image/person/bbox', 'image/num_instances',
+ 'image/shape'], self._bbox_decoder)
+ }
+ decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
+ items_to_handlers)
+ return decoder
+
+ def augment_data(self, dataset, train_cfg):
+ aug_cfg = train_cfg.augmentation
+ preprocess_cfg = train_cfg.preprocess
+ img_size = preprocess_cfg['image_resize']
+ if aug_cfg['flip_left_right']:
+ kp_dict = {kp_name: i for i, kp_name in
+ enumerate(train_cfg.train_keypoints)}
+ flipped_kp_indices = []
+ for kp_name in train_cfg.train_keypoints:
+ if kp_name.startswith('left'):
+ flipped_kp_name = 'right' + kp_name.split('left')[1]
+ flipped_kp_indices.append(kp_dict[flipped_kp_name])
+ elif kp_name.startswith('right'):
+ flipped_kp_name = 'left' + kp_name.split('right')[1]
+ flipped_kp_indices.append(kp_dict[flipped_kp_name])
+ else:
+ flipped_kp_indices.append(kp_dict[kp_name])
+ random_flip_left_right_fn = functools.partial(
+ random_flip_left_right,
+ flipped_keypoint_indices=flipped_kp_indices)
+ dataset = dataset.map(
+ random_flip_left_right_fn,
+ num_parallel_calls=train_cfg.num_parallel_map_calls
+ )
+ dataset = dataset.prefetch(train_cfg.prefetch_size)
+ random_crop_fn = functools.partial(
+ random_crop,
+ crop_size=img_size,
+ scale_range=aug_cfg['scale_range']
+ )
+ if aug_cfg['random_crop']:
+ dataset = dataset.map(
+ random_crop_fn,
+ num_parallel_calls=train_cfg.num_parallel_map_calls
+ )
+ dataset = dataset.prefetch(train_cfg.prefetch_size)
+ if aug_cfg['random_brightness']:
+ dataset = dataset.map(
+ random_brightness,
+ num_parallel_calls=train_cfg.num_parallel_map_calls
+ )
+ dataset = dataset.prefetch(train_cfg.prefetch_size)
+ if aug_cfg['random_contrast']:
+ dataset = dataset.map(
+ random_contrast,
+ num_parallel_calls=train_cfg.num_parallel_map_calls
+ )
+ dataset = dataset.prefetch(train_cfg.prefetch_size)
+ return dataset
+
+ def preprocess_data(self, dataset, train_cfg):
+ preprocess_cfg = train_cfg.preprocess
+ img_size = preprocess_cfg['image_resize']
+ resize_fn = functools.partial(
+ resize,
+ target_image_size=img_size)
+ dataset = dataset.map(
+ resize_fn,
+ num_parallel_calls=train_cfg.num_parallel_map_calls
+ )
+ dataset.prefetch(train_cfg.prefetch_size)
+ return dataset
+
+ def read_data(self, train_config):
+ probs = self._get_probs()
+ probs = tf.cast(probs, tf.float32)
+ decoder = self._decoder()
+ filenames = [ds['tfrecord_path'] for ds in self.datasets]
+ file_ids = list(range(len(filenames)))
+ dataset = tf.data.Dataset.from_tensor_slices((file_ids, filenames))
+ dataset = dataset.apply(tf.contrib.data.rejection_resample(
+ class_func=lambda c, _: c,
+ target_dist=probs,
+ seed=42))
+ dataset = dataset.map(lambda _, a: a[1])
+ if train_config.shuffle:
+ dataset = dataset.shuffle(
+ train_config.filenames_shuffle_buffer_size)
+
+ dataset = dataset.repeat(train_config.num_epochs or None)
+
+ file_read_func = functools.partial(tf.data.TFRecordDataset,
+ buffer_size=8 * 1000 * 1000)
+ dataset = dataset.apply(
+ tf.contrib.data.parallel_interleave(
+ file_read_func, cycle_length=train_config.num_readers,
+ block_length=train_config.read_block_length, sloppy=True))
+ if train_config.shuffle:
+ dataset = dataset.shuffle(train_config.shuffle_buffer_size)
+
+ decode_fn = functools.partial(
+ decoder.decode, items=['image', 'keypoints', 'bbox', 'mask'])
+ dataset = dataset.map(
+ decode_fn, num_parallel_calls=train_config.num_parallel_map_calls)
+ dataset = dataset.prefetch(train_config.prefetch_size)
+
+ train_keypoints = [self.data_cfg.keypoints[kp_name]
+ for kp_name in train_config.train_keypoints]
+ kp_subset_fn = functools.partial(
+ keypoints_select, keypoints_to_keep=train_keypoints)
+ dataset = dataset.map(
+ kp_subset_fn,
+ num_parallel_calls=train_config.num_parallel_map_calls)
+ dataset = dataset.prefetch(train_config.prefetch_size)
+
+ dataset = self.augment_data(dataset, train_config)
+
+ dataset = self.preprocess_data(dataset, train_config)
+ return dataset
diff --git a/dataset/seg_data.py b/dataset/seg_data.py
new file mode 100644
index 0000000..47c63d7
--- /dev/null
+++ b/dataset/seg_data.py
@@ -0,0 +1,72 @@
+import time
+import numpy as np
+import os
+from abc import abstractmethod
+from tqdm import tqdm
+from utils import tfrecord_util
+import tensorflow as tf
+
+
+class SegData(object):
+ def __init__(self, cfg, img_files, mask_files=None):
+ """
+ Constructor of ObjectData class
+ """
+ self.cfg = cfg
+ self.imgs, self.ids = None, None
+ self.data_dir = data_dir
+ self.product_labels = {}
+ print('loading annotations into memory...')
+ tic = time.time()
+ self.datasets = []
+ if type(train_files) != list:
+ train_files = [train_files]
+ for train_file in train_files:
+ labels_file = os.path.dirname(train_file)
+ labels_file = os.path.join(labels_file, 'labels.txt')
+ with open(labels_file, 'r') as f:
+ self.product_names = {}
+ for line in f:
+ label, prod_name = line.split()
+ self.product_labels[prod_name] = int(label)
+ with open(train_file, 'r') as f:
+ dataset = {}
+ for line in f:
+ img, ann_file = line.split()
+ dataset[img] = ann_file
+ self.datasets.append(dataset)
+ print('Done (t={:0.2f}s)'.format(time.time() - tic))
+ self.create_index()
+
+ @abstractmethod
+ def create_index(self):
+ return
+
+ def get_size(self):
+ return len(self.ids)
+
+ def _create_tf_example(self, img_id):
+ img_meta = self.imgs[img_id]
+ img_file = img_meta['filename']
+ img_file = os.path.join(self.data_dir, img_file)
+ img_shape = list(img_meta['shape'])
+
+ feature_dict = {
+ 'image/filename':
+ tfrecord_util.bytes_feature(img_file.encode('utf8')),
+ 'image/shape':
+ tfrecord_util.int64_list_feature(img_shape),
+ 'mask/filename':
+ tfrecord_util.bytes_feature(img_file.encode('utf8'))
+ }
+ return tf.train.Example(features=tf.train.Features(feature=feature_dict))
+
+ def create_tf_record(self, out_path, shuffle=True):
+ print("Creating tf records : ", out_path)
+ writer = tf.python_io.TFRecordWriter(out_path)
+ if shuffle:
+ np.random.shuffle(self.ids)
+ for img_id in tqdm(self.ids):
+ tf_example = self._create_tf_example(img_id)
+ writer.write(tf_example.SerializeToString())
+ writer.close()
diff --git a/fcn_gcn_net.py b/fcn_gcn_net.py
deleted file mode 100644
index 9ab3188..0000000
--- a/fcn_gcn_net.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import tensorflow as tf
-from layers_fcn_gcn import conv_module, global_conv_module, boundary_refine, deconv_module
-
-
-def fcn_gcn_net(input_, num_classes, k_gcn, training, init_channels=8, n_layers=7, batch_norm=True):
- """Based on https://arxiv.org/abs/1703.02719 but using VGG style base
- Args:
- input_ (4-D Tensor): (N, H, W, C)
- num_classes (integer) : Number of classes
- k_gcn (int) : Kernel size for global conv layer
- training (bool): If True, run in training mode
- init_channels (int) : Number of channels in the first conv layer
- n_layers (int) : Number of times to downsample/upsample
- batch_norm (bool): if True, use batch-norm
- Returns:
- output (4-D Tensor): (N, H, W, n)
- Logits classifying each pixel as either 'car' (1) or 'not car' (0)
- """
- # color-space adjustment
- net = tf.layers.conv2d(input_, 3, (1, 1), name="color_space_adjust")
- n = n_layers
-
- # encoder
- feed = net
- ch = init_channels
- conv_blocks = []
- for i in range(n-1):
- conv, feed = conv_module(feed, ch, training, name=str(i + 1), batch_norm=batch_norm)
- conv_blocks.append(conv)
- ch *= 2
- last_conv = conv_module(feed, ch, training, name=str(n), pool=False, batch_norm=batch_norm)
- conv_blocks.append(last_conv)
-
- # global convolution network
- global_conv_blocks = []
- for i in range(n):
- global_conv_blocks.append(global_conv_module(conv_blocks[i], num_classes, training,
- k = k_gcn, name=str(i + 1)))
-
- # boundary refinement
- br_blocks = []
- for i in range(n):
- br_blocks.append(boundary_refine(global_conv_blocks[i], training, name=str(i + 1),
- batch_norm=batch_norm))
-
- # decoder / upsampling
- up_blocks = []
- last_br = br_blocks[-1]
- for i in range(n-1, 0, -1):
- deconv = deconv_module(last_br, name=str(i+1), stride=2, kernel_size=4)
- up = tf.add(deconv, br_blocks[i - 1])
- last_br = boundary_refine(up, training, name='up_' + str(i))
- up_blocks.append(up)
-
- logits = last_br
- return logits
-
diff --git a/layers_fcn_gcn.py b/layers/layers_fcn_gcn.py
similarity index 100%
rename from layers_fcn_gcn.py
rename to layers/layers_fcn_gcn.py
diff --git a/layers_unet.py b/layers/layers_unet.py
similarity index 100%
rename from layers_unet.py
rename to layers/layers_unet.py
diff --git a/model.py b/model.py
deleted file mode 100644
index 703cbe5..0000000
--- a/model.py
+++ /dev/null
@@ -1,255 +0,0 @@
-import tensorflow as tf
-from u_net import unet
-from fcn_gcn_net import fcn_gcn_net
-from losses import pixel_wise_loss, dice_coef_loss, mask_prediction
-from utils import dice_coef, rle_encode, rle_to_string
-from tqdm import tqdm
-from utils import ImageLoader
-import os
-import sys
-import cv2
-import numpy as np
-from scipy.misc import imresize
-import pandas as pd
-
-
-
-class Model(object):
- def __init__(self, params, cfg):
- self.params = params
- self.mode = params.phase
- self.is_train = (self.mode == 'train')
- self.batch_size = params.batch_size
- self.stage = params.stage
-
- self.cfg = {}
- self.cfg = cfg
- self.image_loader = ImageLoader(cfg)
-
- self.tf_placeholders = {}
- self.create_tf_placeholders()
-
- self.global_step = tf.Variable(0, name='global_step', trainable=False)
- self.train_op, self.loss_op = None, None
- self.eval_op = None
- self.mask_logits = self.build_net()
-
- def create_tf_placeholders(self):
- roi_h, roi_w = self.cfg['scaled_img_shape'] #self.cfg['roi_shape']
- roi_images = tf.placeholder(tf.float32, [self.batch_size, roi_h, roi_w, 3])
- roi_masks = tf.placeholder(tf.float32, [self.batch_size, roi_h, roi_w, 2])
- roi_weights = tf.placeholder(tf.float32, [self.batch_size, roi_h, roi_w])
- self.tf_placeholders = {'images': roi_images,
- 'masks': roi_masks,
- 'weights': roi_weights}
-
- def build_net(self):
- batch_norm = self.params.batch_norm
- # tf.reset_default_graph()
- roi_images = self.tf_placeholders["images"]
- net = self.params.net
- if net == 'unet':
- mask_logits = unet(roi_images, num_classes=2, training=self.is_train,
- init_channels=8, n_layers=6, batch_norm=batch_norm)
- else:
- mask_logits = fcn_gcn_net(roi_images, num_classes=2, k_gcn=11, training=self.is_train,
- init_channels=8, n_layers=7, batch_norm=True)
- return mask_logits
-
- def make_train_op(self):
- learning_rate = self.params.learning_rate
- roi_masks = self.tf_placeholders["masks"]
- roi_masks_pos = tf.slice(roi_masks, [0, 0, 0, 1], [-1, -1, -1, 1])
- roi_masks_pos = tf.squeeze(roi_masks_pos, [-1])
- roi_weights = self.tf_placeholders["weights"]
- _, tf_mask = mask_prediction(self.mask_logits)
- loss0 = dice_coef_loss(roi_masks_pos, tf_mask)
- loss1 = pixel_wise_loss(self.mask_logits, roi_masks, pixel_weights=roi_weights)
- loss = loss0 + self.params.sce_weight * loss1
- solver = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8)
-
- update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
- with tf.control_dependencies(update_ops):
- self.train_op = solver.minimize(loss, global_step=self.global_step)
- self.loss_op = [loss0, loss1]
-
- def make_eval_op(self):
- pred_probs, pred_masks = mask_prediction(self.mask_logits)
- self.eval_op = [pred_probs, pred_masks]
-
- def get_feed_dict(self, batch, perturb=True):
- if self.stage == 1:
- roi_images, roi_masks, roi_weights = \
- self.image_loader.load_img_batch(batch, edge_factor=self.params.edge_factor)
- else:
- roi_images, roi_masks, roi_weights = \
- self.image_loader.load_roi_batch(batch, perturb=perturb,
- edge_factor=self.params.edge_factor)
- tf_roi_images = self.tf_placeholders["images"]
- if roi_masks is None:
- return {tf_roi_images: roi_images}
- tf_roi_masks = self.tf_placeholders["masks"]
- tf_roi_weights = self.tf_placeholders["weights"]
- return {tf_roi_images: roi_images,
- tf_roi_masks: roi_masks,
- tf_roi_weights: roi_weights}
-
- def train(self, data):
- """ Train the model. """
- params = self.params
- save_dir = os.path.join(params.save_dir, str(params.set).zfill(2), 'stage_'+str(self.stage))
- if not os.path.exists(save_dir):
- os.makedirs(save_dir)
- save_dir = os.path.join(save_dir, 'model')
- self.make_train_op()
-
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- saver = tf.train.Saver()
- if params.load:
- self.load(sess, saver)
-
- n_display = params.display_period
- for i_epoch in tqdm(list(range(params.num_epochs)), desc='epoch'):
- dice_loss, sce_loss, n_steps = 0, 0, 0
- for _ in tqdm(list(range(0, data.count, self.batch_size)), desc='batch'):
- batch = data.next_batch()
- if len(batch[0]) < self.batch_size:
- continue
- ops = [self.train_op, self.global_step] + self.loss_op
- feed_dict = self.get_feed_dict(batch, perturb=True)
- _, global_step, loss0, loss1 = sess.run(ops, feed_dict=feed_dict)
- if n_steps + 1 == n_display:
- print("Dice coeff : {}, Cross entropy loss : {}"
- .format(-dice_loss/n_steps, sce_loss/n_steps))
- dice_loss, sce_loss, n_steps = 0, 0, 0
- else:
- dice_loss += loss0
- sce_loss += loss1
- n_steps += 1
-
- if (global_step + 1) % params.save_period == 0:
- print("Saving model in {}".format(save_dir))
- saver.save(sess, save_dir, global_step)
- data.reset()
- print("{} epochs finished.".format(i_epoch))
-
- def validate(self, data):
- """ Test the model. """
- # params = self.params
- self.make_eval_op()
-
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- saver = tf.train.Saver()
- self.load(sess, saver)
- for _ in tqdm(list(range(data.count)), desc='batch'):
- batch = data.next_batch()
- img_file, mask_file = batch[0][0], batch[1][0]
-
- gt_bbox = self.image_loader.generate_rois([mask_file], perturb=False)[0]
- feed_dict = self.get_feed_dict(batch, perturb=False)
- pred_probs, _ = sess.run(self.eval_op, feed_dict=feed_dict)
- # pred_mask = np.zeros_like(pred_probs, dtype=np.uint8)
- # pred_mask[np.where(pred_mask > 0.5)] = 1
- # print(np.where(pred_mask > 0.5))
- mask_pred = pred_probs[0, :, :, 1]
- mask_pred[mask_pred > 0.5] = 1
- mask_pred[mask_pred <= 0.5] = 0
-
- if True:
- img = cv2.imread(img_file)
- real_mask = np.zeros_like(img, dtype=np.uint8)
- if self.stage == 1:
- img_h, img_w = self.cfg['image_shape']
- l, r, t, b = self.cfg['crops']
- pred_mask = imresize(mask_pred, (img_h - t - b, img_w - l - r)) / 255
- real_mask[t: img_h - b, l: img_w - r, 0] = np.uint8(np.round(pred_mask))
- else:
- y, x, h, w = gt_bbox
- pred_mask = cv2.resize(mask_pred, (w, h))
- real_mask[y:y + h, x:x + w, 0] = np.uint8(pred_mask)
-
-
- winname = 'Image %s' % (img_file)
- img = cv2.resize(img, (1438, 960))
- img_mask = cv2.resize(real_mask * 255, (1438, 960), interpolation=cv2.INTER_CUBIC)
- display_img = cv2.addWeighted(img, 0.2, img_mask, 0.8, 0)
- cv2.imshow(winname, display_img)
- cv2.moveWindow(winname, 100, 100)
- cv2.waitKey(1000)
-
- gt_mask = self.image_loader.load_mask(mask_file)
- print("Dice coefficient : ", dice_coef(gt_mask, real_mask[:,:,0]))
-
- def test(self, data):
- """ Test the model. """
- params = self.params
- self.make_eval_op()
-
- res_dir = params.test_results_dir
- res_dir = os.path.join(res_dir, str(params.set).zfill(2),
- 'stage_' + str(params.stage))
- if not os.path.exists(res_dir):
- os.makedirs(res_dir)
- img_names = []
- rle_strings = []
-
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- saver = tf.train.Saver()
- self.load(sess, saver)
- for _ in tqdm(list(range(data.count)), desc='batch'):
- batch = data.next_batch()
- img_file = batch[0][0]
- feed_dict = self.get_feed_dict(batch, perturb=False)
- pred_probs, _ = sess.run(self.eval_op, feed_dict=feed_dict)
- # pred_mask = np.zeros_like(pred_probs, dtype=np.uint8)
- # pred_mask[np.where(pred_mask > 0.5)] = 1
- # print(np.where(pred_mask > 0.5))
- mask_pred = pred_probs[0, :, :]
- #mask_pred[mask_pred > 0.5] = 1
- #mask_pred[mask_pred <= 0.5] = 0
- real_mask = self.image_loader.postprocess(mask_pred)
- rle = rle_encode(real_mask)
- rle_strings.append(rle_to_string(rle))
-
- if 1:
- img = cv2.imread(img_file)
- img_mask = np.zeros_like(img)
- img_mask[:, :, 0] = real_mask * 255
- # y, x, h, w = gt_bbox
- # print(gt_bbox)
-
- winname = 'Image %s' % (img_file)
- img = cv2.resize(img, (1438, 960))
-
-
- img_mask = cv2.resize(img_mask, (1438, 960))
- display_img = cv2.addWeighted(img, 0.4, img_mask, 0.6, 0)
- cv2.imshow(winname, display_img)
- cv2.moveWindow(winname, 100, 100)
- cv2.waitKey(1000)
-
- img_name = os.path.basename(img_file)
- img_names.append(img_name)
- #outfile = os.path.join(res_dir, str(img_name) + '.npy')
- #np.save(outfile, mask_pred)
- df = {'img' : img_names, 'rle_mask' : rle_strings}
- df = pd.DataFrame(df)
- outfile = os.path.join(res_dir, 'results.csv')
- df.to_csv(outfile)
-
-
-
- def load(self, sess, saver):
- """ Load the trained model. """
- params = self.params
- print("Loading model...")
- load_dir = os.path.join(params.save_dir, str(params.set).zfill(2),
- 'stage_'+str(params.stage), 'model')
- checkpoint = tf.train.get_checkpoint_state(os.path.dirname(load_dir))
- if checkpoint is None:
- print("Error: No saved model found. Please train first.")
- sys.exit(0)
- saver.restore(sess, checkpoint.model_checkpoint_path)
\ No newline at end of file
diff --git a/models/base_model.py b/models/base_model.py
new file mode 100644
index 0000000..3c116ff
--- /dev/null
+++ b/models/base_model.py
@@ -0,0 +1,244 @@
+from models.losses import pixel_wise_loss
+from abc import abstractmethod
+
+
+class SegModel(object):
+ def __init__(self, model_cfg):
+ self.cfg = model_cfg
+
+ @abstractmethod
+ def preprocess(self, inputs):
+ """Image preprocessing"""
+ raise NotImplementedError("Not yet implemented")
+
+ @abstractmethod
+ def build_net(self, preprocessed_inputs, is_training=False):
+ """Builds network and returns heatmaps and fpn features"""
+ raise NotImplementedError("Not yet implemented")
+
+ def predict(self, inputs, is_training=False):
+ images = inputs['images']
+ preprocessed_inputs = self.preprocess(images)
+ mask_logits = self.build_net(
+ preprocessed_inputs, is_training=is_training)
+ prediction = {'mask_logits': mask_logits}
+ return prediction
+
+ def losses(self, prediction, ground_truth):
+ mask_logits = prediction['mask_logits']
+ masks_gt = ground_truth['masks']
+ weights_gt = None
+ if self.cfg.use_weights:
+ weights_gt = ground_truth['weights']
+ loss = pixel_wise_loss(mask_logits, masks_gt, pixel_weights=weights_gt)
+ losses = {'CE_loss': loss}
+ return losses
+
+
+ # def create_tf_placeholders(self):
+ # roi_h, roi_w = self.cfg['scaled_img_shape'] #self.cfg['roi_shape']
+ # roi_images = tf.placeholder(tf.float32, [self.batch_size, roi_h, roi_w, 3])
+ # roi_masks = tf.placeholder(tf.float32, [self.batch_size, roi_h, roi_w, 2])
+ # roi_weights = tf.placeholder(tf.float32, [self.batch_size, roi_h, roi_w])
+ # self.tf_placeholders = {'images': roi_images,
+ # 'masks': roi_masks,
+ # 'weights': roi_weights}
+
+
+ # def make_train_op(self):
+ # learning_rate = self.params.learning_rate
+ # roi_masks = self.tf_placeholders["masks"]
+ # roi_masks_pos = tf.slice(roi_masks, [0, 0, 0, 1], [-1, -1, -1, 1])
+ # roi_masks_pos = tf.squeeze(roi_masks_pos, [-1])
+ # roi_weights = self.tf_placeholders["weights"]
+ # _, tf_mask = mask_prediction(self.mask_logits)
+ # loss0 = dice_coef_loss(roi_masks_pos, tf_mask)
+ # loss1 = pixel_wise_loss(self.mask_logits, roi_masks, pixel_weights=roi_weights)
+ # loss = loss0 + self.params.sce_weight * loss1
+ # solver = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8)
+ #
+ # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+ # with tf.control_dependencies(update_ops):
+ # self.train_op = solver.minimize(loss, global_step=self.global_step)
+ # self.loss_op = [loss0, loss1]
+ #
+ # def make_eval_op(self):
+ # pred_probs, pred_masks = mask_prediction(self.mask_logits)
+ # self.eval_op = [pred_probs, pred_masks]
+ #
+ # def get_feed_dict(self, batch, perturb=True):
+ # if self.stage == 1:
+ # roi_images, roi_masks, roi_weights = \
+ # self.image_loader.load_img_batch(batch, edge_factor=self.params.edge_factor)
+ # else:
+ # roi_images, roi_masks, roi_weights = \
+ # self.image_loader.load_roi_batch(batch, perturb=perturb,
+ # edge_factor=self.params.edge_factor)
+ # tf_roi_images = self.tf_placeholders["images"]
+ # if roi_masks is None:
+ # return {tf_roi_images: roi_images}
+ # tf_roi_masks = self.tf_placeholders["masks"]
+ # tf_roi_weights = self.tf_placeholders["weights"]
+ # return {tf_roi_images: roi_images,
+ # tf_roi_masks: roi_masks,
+ # tf_roi_weights: roi_weights}
+ #
+ # def train(self, data):
+ # """ Train the model. """
+ # params = self.params
+ # save_dir = os.path.join(params.save_dir, str(params.set).zfill(2), 'stage_'+str(self.stage))
+ # if not os.path.exists(save_dir):
+ # os.makedirs(save_dir)
+ # save_dir = os.path.join(save_dir, 'model')
+ # self.make_train_op()
+ #
+ # with tf.Session() as sess:
+ # sess.run(tf.global_variables_initializer())
+ # saver = tf.train.Saver()
+ # if params.load:
+ # self.load(sess, saver)
+ #
+ # n_display = params.display_period
+ # for i_epoch in tqdm(list(range(params.num_epochs)), desc='epoch'):
+ # dice_loss, sce_loss, n_steps = 0, 0, 0
+ # for _ in tqdm(list(range(0, data.count, self.batch_size)), desc='batch'):
+ # batch = data.next_batch()
+ # if len(batch[0]) < self.batch_size:
+ # continue
+ # ops = [self.train_op, self.global_step] + self.loss_op
+ # feed_dict = self.get_feed_dict(batch, perturb=True)
+ # _, global_step, loss0, loss1 = sess.run(ops, feed_dict=feed_dict)
+ # if n_steps + 1 == n_display:
+ # print("Dice coeff : {}, Cross entropy loss : {}"
+ # .format(-dice_loss/n_steps, sce_loss/n_steps))
+ # dice_loss, sce_loss, n_steps = 0, 0, 0
+ # else:
+ # dice_loss += loss0
+ # sce_loss += loss1
+ # n_steps += 1
+ #
+ # if (global_step + 1) % params.save_period == 0:
+ # print("Saving model in {}".format(save_dir))
+ # saver.save(sess, save_dir, global_step)
+ # data.reset()
+ # print("{} epochs finished.".format(i_epoch))
+ #
+ # def validate(self, data):
+ # """ Test the model. """
+ # # params = self.params
+ # self.make_eval_op()
+ #
+ # with tf.Session() as sess:
+ # sess.run(tf.global_variables_initializer())
+ # saver = tf.train.Saver()
+ # self.load(sess, saver)
+ # for _ in tqdm(list(range(data.count)), desc='batch'):
+ # batch = data.next_batch()
+ # img_file, mask_file = batch[0][0], batch[1][0]
+ #
+ # gt_bbox = self.image_loader.generate_rois([mask_file], perturb=False)[0]
+ # feed_dict = self.get_feed_dict(batch, perturb=False)
+ # pred_probs, _ = sess.run(self.eval_op, feed_dict=feed_dict)
+ # # pred_mask = np.zeros_like(pred_probs, dtype=np.uint8)
+ # # pred_mask[np.where(pred_mask > 0.5)] = 1
+ # # print(np.where(pred_mask > 0.5))
+ # mask_pred = pred_probs[0, :, :, 1]
+ # mask_pred[mask_pred > 0.5] = 1
+ # mask_pred[mask_pred <= 0.5] = 0
+ #
+ # if True:
+ # img = cv2.imread(img_file)
+ # real_mask = np.zeros_like(img, dtype=np.uint8)
+ # if self.stage == 1:
+ # img_h, img_w = self.cfg['image_shape']
+ # l, r, t, b = self.cfg['crops']
+ # pred_mask = imresize(mask_pred, (img_h - t - b, img_w - l - r)) / 255
+ # real_mask[t: img_h - b, l: img_w - r, 0] = np.uint8(np.round(pred_mask))
+ # else:
+ # y, x, h, w = gt_bbox
+ # pred_mask = cv2.resize(mask_pred, (w, h))
+ # real_mask[y:y + h, x:x + w, 0] = np.uint8(pred_mask)
+ #
+ #
+ # winname = 'Image %s' % (img_file)
+ # img = cv2.resize(img, (1438, 960))
+ # img_mask = cv2.resize(real_mask * 255, (1438, 960), interpolation=cv2.INTER_CUBIC)
+ # display_img = cv2.addWeighted(img, 0.2, img_mask, 0.8, 0)
+ # cv2.imshow(winname, display_img)
+ # cv2.moveWindow(winname, 100, 100)
+ # cv2.waitKey(1000)
+ #
+ # gt_mask = self.image_loader.load_mask(mask_file)
+ # print("Dice coefficient : ", dice_coef(gt_mask, real_mask[:,:,0]))
+ #
+ # def test(self, data):
+ # """ Test the model. """
+ # params = self.params
+ # self.make_eval_op()
+ #
+ # res_dir = params.test_results_dir
+ # res_dir = os.path.join(res_dir, str(params.set).zfill(2),
+ # 'stage_' + str(params.stage))
+ # if not os.path.exists(res_dir):
+ # os.makedirs(res_dir)
+ # img_names = []
+ # rle_strings = []
+ #
+ # with tf.Session() as sess:
+ # sess.run(tf.global_variables_initializer())
+ # saver = tf.train.Saver()
+ # self.load(sess, saver)
+ # for _ in tqdm(list(range(data.count)), desc='batch'):
+ # batch = data.next_batch()
+ # img_file = batch[0][0]
+ # feed_dict = self.get_feed_dict(batch, perturb=False)
+ # pred_probs, _ = sess.run(self.eval_op, feed_dict=feed_dict)
+ # # pred_mask = np.zeros_like(pred_probs, dtype=np.uint8)
+ # # pred_mask[np.where(pred_mask > 0.5)] = 1
+ # # print(np.where(pred_mask > 0.5))
+ # mask_pred = pred_probs[0, :, :]
+ # #mask_pred[mask_pred > 0.5] = 1
+ # #mask_pred[mask_pred <= 0.5] = 0
+ # real_mask = self.image_loader.postprocess(mask_pred)
+ # rle = rle_encode(real_mask)
+ # rle_strings.append(rle_to_string(rle))
+ #
+ # if 1:
+ # img = cv2.imread(img_file)
+ # img_mask = np.zeros_like(img)
+ # img_mask[:, :, 0] = real_mask * 255
+ # # y, x, h, w = gt_bbox
+ # # print(gt_bbox)
+ #
+ # winname = 'Image %s' % (img_file)
+ # img = cv2.resize(img, (1438, 960))
+ #
+ #
+ # img_mask = cv2.resize(img_mask, (1438, 960))
+ # display_img = cv2.addWeighted(img, 0.4, img_mask, 0.6, 0)
+ # cv2.imshow(winname, display_img)
+ # cv2.moveWindow(winname, 100, 100)
+ # cv2.waitKey(1000)
+ #
+ # img_name = os.path.basename(img_file)
+ # img_names.append(img_name)
+ # #outfile = os.path.join(res_dir, str(img_name) + '.npy')
+ # #np.save(outfile, mask_pred)
+ # df = {'img' : img_names, 'rle_mask' : rle_strings}
+ # df = pd.DataFrame(df)
+ # outfile = os.path.join(res_dir, 'results.csv')
+ # df.to_csv(outfile)
+ #
+ #
+ #
+ # def load(self, sess, saver):
+ # """ Load the trained model. """
+ # params = self.params
+ # print("Loading model...")
+ # load_dir = os.path.join(params.save_dir, str(params.set).zfill(2),
+ # 'stage_'+str(params.stage), 'model')
+ # checkpoint = tf.train.get_checkpoint_state(os.path.dirname(load_dir))
+ # if checkpoint is None:
+ # print("Error: No saved model found. Please train first.")
+ # sys.exit(0)
+ # saver.restore(sess, checkpoint.model_checkpoint_path)
\ No newline at end of file
diff --git a/models/fcn_gcn_net.py b/models/fcn_gcn_net.py
new file mode 100644
index 0000000..a923fbe
--- /dev/null
+++ b/models/fcn_gcn_net.py
@@ -0,0 +1,73 @@
+import tensorflow as tf
+from layers.layers_fcn_gcn import (
+ conv_module, global_conv_module, boundary_refine, deconv_module)
+from models.base_model import SegModel
+
+
+class FCNGCNnet(SegModel):
+ def __init__(self, cfg):
+ super().__init__(cfg)
+
+ def preprocess(self, inputs):
+ """Image preprocessing"""
+ h, w = self.cfg.input_shape
+ inputs = tf.reshape(inputs, [-1, h, w, 3])
+ return 2.0 * tf.to_float(inputs) / 255. - 1.0
+
+ def build_net(self, input_, is_training=False):
+ """Based on https://arxiv.org/abs/1703.02719 but using VGG style base
+ Args:
+ input_ (4-D Tensor): (N, H, W, C)
+ is_training (bool): If True, run in training mode
+ Returns:
+ output (4-D Tensor): (N, H, W, n)
+ Logits classifying each pixel as either 'car' (1) or 'not car' (0)
+ """
+ num_classes = self.cfg.num_classes # Number of classes
+ k_gcn = self.cfg.k_gcn # Kernel size for global conv layer
+ init_channels = self.cfg.init_channels # Number of channels in the first conv layer
+ n_layers = self.cfg.n_layers # Number of times to downsample/upsample
+ batch_norm = self.cfg.batch_norm # if True, use batch-norm
+
+ # color-space adjustment
+ net = tf.layers.conv2d(input_, 3, (1, 1), name="color_space_adjust")
+ n = n_layers
+
+ # encoder
+ feed = net
+ ch = init_channels
+ conv_blocks = []
+ for i in range(n-1):
+ conv, feed = conv_module(feed, ch, is_training, name=str(i + 1),
+ batch_norm=batch_norm)
+ conv_blocks.append(conv)
+ ch *= 2
+ last_conv = conv_module(feed, ch, is_training, name=str(n), pool=False,
+ batch_norm=batch_norm)
+ conv_blocks.append(last_conv)
+
+ # global convolution network
+ global_conv_blocks = []
+ for i in range(n):
+ global_conv_blocks.append(
+ global_conv_module(conv_blocks[i], num_classes, is_training,
+ k=k_gcn, name=str(i + 1)))
+
+ # boundary refinement
+ br_blocks = []
+ for i in range(n):
+ br_blocks.append(boundary_refine(global_conv_blocks[i], is_training,
+ name=str(i + 1), batch_norm=batch_norm))
+
+ # decoder / upsampling
+ up_blocks = []
+ last_br = br_blocks[-1]
+ for i in range(n-1, 0, -1):
+ deconv = deconv_module(last_br, name=str(i+1), stride=2, kernel_size=4)
+ up = tf.add(deconv, br_blocks[i - 1])
+ last_br = boundary_refine(up, is_training, name='up_' + str(i))
+ up_blocks.append(up)
+
+ logits = last_br
+ return logits
+
diff --git a/losses.py b/models/losses.py
similarity index 95%
rename from losses.py
rename to models/losses.py
index 45ae910..cccc160 100644
--- a/losses.py
+++ b/models/losses.py
@@ -1,7 +1,7 @@
import tensorflow as tf
-def dice_coef(y_true, y_pred, axis=None, smooth = 0.001):
+def dice_coef(y_true, y_pred, axis=None, smooth=0.001):
if axis is None:
axis=[1,2]
y_true_f = tf.cast(y_true, dtype=tf.float32)
@@ -50,4 +50,4 @@ def mask_prediction(pixel_logits):
masks = tf.reshape(masks, [n.value, h.value, w.value])
probs = tf.slice(probs, [0, 0, 0, 1], [-1, -1, -1, 1])
probs = tf.squeeze(probs, axis=-1)
- return probs, masks
\ No newline at end of file
+ return probs, masks
diff --git a/models/u_net.py b/models/u_net.py
new file mode 100644
index 0000000..92f0bda
--- /dev/null
+++ b/models/u_net.py
@@ -0,0 +1,57 @@
+import tensorflow as tf
+from layers.layers_unet import conv_module, upsample
+from models.base_model import SegModel
+
+
+class UNet(SegModel):
+ def __init__(self, cfg):
+ super().__init__(cfg)
+
+ def preprocess(self, inputs):
+ """Image preprocessing"""
+ h, w = self.cfg.input_shape
+ inputs = tf.reshape(inputs, [-1, h, w, 3])
+ return 2.0 * tf.to_float(inputs) / 255. - 1.0
+
+ def build_net(self, input_, is_training=False):
+ """Based on https://arxiv.org/abs/1505.04597
+ Args:
+ input_ (4-D Tensor): (N, H, W, C)
+ is_training (bool): If True, run in training mode
+ Returns:
+ output (4-D Tensor): (N, H, W, n)
+ Logits classifying each pixel as either 'car' (1) or 'not car' (0)
+ """
+ num_classes = self.cfg.num_classes # Number of classes
+ n_layers = self.cfg.n_layers # Number of times to downsample/upsample
+ init_channels = self.cfg.init_channels # Number of channels in the first conv layer
+ batch_norm = self.cfg.batch_norm # if True, use batch-norm
+
+ # color-space adjustment
+ net = tf.layers.conv2d(input_, 3, (1, 1), name="color_space_adjust")
+
+ # encoder
+ feed = net
+ ch = init_channels
+ conv_blocks = []
+ for i in range(n_layers):
+ conv, feed = conv_module(feed, ch, is_training, name='down_{}'.format(i + 1),
+ batch_norm=batch_norm)
+ conv_blocks.append(conv)
+ ch *= 2
+ last_conv = conv_module(feed, ch, is_training, name='down_{}'.format(n_layers+1),
+ pool=False, batch_norm=batch_norm)
+ conv_blocks.append(last_conv)
+
+ # decoder / upsampling
+ feed = conv_blocks[-1]
+ for i in range(n_layers, 0, -1):
+ ch /= 2
+ up = upsample(feed, name=str(i+1))
+ concat = tf.concat([up, conv_blocks[i-1]], axis=-1, name="concat_{}".format(i))
+ feed = conv_module(concat, ch, is_training, name='up_{}'.format(i), batch_norm=batch_norm,
+ pool=False)
+
+ logits = tf.layers.conv2d(feed, num_classes, (1, 1), name='logits', activation=None, padding='same')
+ return logits
+
diff --git a/u_net.py b/u_net.py
deleted file mode 100644
index 5dc62f1..0000000
--- a/u_net.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import tensorflow as tf
-from layers_unet import conv_module, upsample
-
-
-def unet(input_, num_classes, training, init_channels=8, n_layers=6, batch_norm=True):
- """Based on https://arxiv.org/abs/1505.04597
- Args:
- input_ (4-D Tensor): (N, H, W, C)
- num_classes (int) : Number of classes
- n_layers (int) : Number of times to downsample/upsample
- training (bool): If True, run in training mode
- init_channels (int) : Number of channels in the first conv layer
- batch_norm (bool): if True, use batch-norm
- Returns:
- output (4-D Tensor): (N, H, W, n)
- Logits classifying each pixel as either 'car' (1) or 'not car' (0)
- """
- # color-space adjustment
- net = tf.layers.conv2d(input_, 3, (1, 1), name="color_space_adjust")
-
- # encoder
- feed = net
- ch = init_channels
- conv_blocks = []
- for i in range(n_layers):
- conv, feed = conv_module(feed, ch, training, name='down_{}'.format(i + 1), batch_norm=batch_norm)
- conv_blocks.append(conv)
- ch *= 2
- last_conv = conv_module(feed, ch, training, name='down_{}'.format(n_layers+1),
- pool=False, batch_norm=batch_norm)
- conv_blocks.append(last_conv)
-
- # decoder / upsampling
- feed = conv_blocks[-1]
- for i in range(n_layers, 0, -1):
- ch /= 2
- up = upsample(feed, name=str(i+1))
- concat = tf.concat([up, conv_blocks[i-1]], axis=-1, name="concat_{}".format(i))
- feed = conv_module(concat, ch, training, name='up_{}'.format(i), batch_norm=batch_norm,
- pool=False)
-
- logits = tf.layers.conv2d(feed, num_classes, (1, 1), name='logits', activation=None, padding='same')
- return logits
-
diff --git a/utils/dataset_util.py b/utils/dataset_util.py
new file mode 100644
index 0000000..6f3b305
--- /dev/null
+++ b/utils/dataset_util.py
@@ -0,0 +1,162 @@
+import tensorflow as tf
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+
+
+def random_int(maxval, minval=0):
+ return tf.random_uniform(
+ shape=[], minval=minval, maxval=maxval, dtype=tf.int32)
+
+
+def rotate(image, k):
+ # k = np.random.randint(0, 4)
+ if k > 0:
+ image = tf.image.rot90(image, k)
+ return image
+
+
+def flip_left_right(img):
+ # random_var = random_int(2)
+ # random_var = tf.cast(random_var, tf.bool)
+ # flipped_img = tf.cond(random_var,
+ # true_fn=lambda: tf.image.flip_left_right(img),
+ # false_fn=lambda: tf.identity(img))
+ # mask = tf.expand_dims(mask, axis=2)
+ # flipped_mask = tf.cond(random_var,
+ # true_fn=lambda: tf.image.flip_left_right(mask),
+ # false_fn=lambda: tf.identity(mask))
+ # flipped_mask = tf.squeeze(flipped_mask)
+ # if weights is None:
+ # return flipped_img, flipped_mask
+ # weights = tf.expand_dims(mask, axis=2)
+ # flipped_weights = tf.cond(
+ # random_var,
+ # true_fn=lambda: tf.image.flip_left_right(weights),
+ # false_fn=lambda: tf.identity(weights))
+ # flipped_weights = tf.squeeze(flipped_weights)
+ flipped_image = tf.image.flip_left_right(img)
+ return flipped_image
+
+
+def random_brightness(image):
+ image = tf.image.random_brightness(
+ image,
+ max_delta=0.1)
+ return image
+
+
+def random_contrast(image):
+ image = tf.image.random_contrast(
+ image,
+ lower=0.9,
+ upper=1.1)
+ return image
+
+
+def random_hue(image):
+ image = tf.image.random_hue(
+ image,
+ max_delta=0.1)
+ return image
+
+
+# def resize(image, keypoints, bbox, mask,
+# target_image_size=(224, 224),
+# target_mask_size=None):
+# img_size = list(target_image_size)
+# if target_mask_size is None:
+# target_mask_size = img_size
+# mask_size = list(target_mask_size)
+# new_image = tf.image.resize_images(image, size=img_size)
+# new_mask = tf.expand_dims(mask, axis=2)
+# new_mask.set_shape([None, None, 1])
+# new_mask = tf.image.resize_images(new_mask, size=mask_size)
+# new_mask = tf.squeeze(new_mask)
+# return new_image, keypoints, bbox, new_mask
+
+###################################################
+# Some other potentially useful functions
+
+def rle_encode(mask_image):
+ pixels = mask_image.flatten()
+ # We avoid issues with '1' at the start or end (at the corners of
+ # the original image) by setting those pixels to '0' explicitly.
+ # We do not expect these to be non-zero for an accurate mask,
+ # so this should not harm the score.
+ pixels[0] = 0
+ pixels[-1] = 0
+ runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
+ runs[1::2] = runs[1::2] - runs[:-1:2]
+ return runs
+
+
+def rle_to_string(runs):
+ return ' '.join(str(x) for x in runs)
+
+
+def dice_coef(y_true, y_pred, smooth=0.001):
+ y_true = np.array(y_true).flatten()
+ y_pred = np.array(y_pred).flatten()
+ intersection = np.sum(y_true * y_pred)
+ dice = (2. * intersection + smooth) / (np.sum(y_true) + np.sum(y_pred) + smooth)
+ return np.mean(dice)
+
+
+def get_bbox(mask_file):
+ binary_img = plt.imread(mask_file)
+ if binary_img.ndim > 2:
+ binary_img = binary_img[:, :, 0] // 255
+ ymin, xmin = np.min(np.nonzero(binary_img), axis=1)
+ ymax, xmax = np.max(np.nonzero(binary_img), axis=1)
+ return [ymin - 1, xmin - 1, ymax - ymin + 2, xmax - xmin + 2]
+
+
+def fix_aspect_ratio(cfg, rois):
+ h_roi, w_roi = cfg['roi_shape']
+ roi_aspect = w_roi / h_roi
+
+ aspect_rois = rois[:, 3] / rois[:, 2]
+ idx_a = np.where(aspect_rois > roi_aspect)[0]
+ idx_b = np.where(aspect_rois < roi_aspect)[0]
+
+ rois_a = rois[idx_a, :]
+ desired_h = rois_a[:, 3] / roi_aspect
+ delta_h = (desired_h - rois_a[:, 2]) / 2
+ rois_a[:, 0] = rois_a[:, 0] - delta_h
+ rois_a[:, 2] = desired_h
+
+ rois_b = rois[idx_b, :]
+ desired_w = rois_b[:, 2] * roi_aspect
+ delta_w = (desired_w - rois_b[:, 3]) / 2
+ rois_b[:, 1] = rois_b[:, 1] - delta_w
+ rois_b[:, 3] = desired_w
+
+ rois[idx_a, :] = rois_a
+ rois[idx_b, :] = rois_b
+
+ return rois
+
+
+def filter_mask(mask):
+ kernel = np.ones((2, 2))
+ mask_smooth = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
+ mask_smooth = np.uint8(np.round(mask_smooth))
+ blobs = cv2.connectedComponentsWithStats(mask_smooth, 4, cv2.CV_32S)
+ stats = blobs[2]
+ obj_label = None
+ for i, stat in enumerate(stats):
+ if stat[4] < 10000:
+ continue
+ elif (stat[0] < 2) and (stat[1] < 2):
+ continue
+ else:
+ obj_label = i
+ break
+ blobs = blobs[1]
+ blobs[blobs != obj_label] = 0
+ return np.uint8(blobs)
+
+
+
+
diff --git a/utils.py b/utils/depricated.py
similarity index 71%
rename from utils.py
rename to utils/depricated.py
index 1f68f09..75ba963 100644
--- a/utils.py
+++ b/utils/depricated.py
@@ -4,86 +4,6 @@
from scipy.misc import imresize
-def rle_encode(mask_image):
- pixels = mask_image.flatten()
- # We avoid issues with '1' at the start or end (at the corners of
- # the original image) by setting those pixels to '0' explicitly.
- # We do not expect these to be non-zero for an accurate mask,
- # so this should not harm the score.
- pixels[0] = 0
- pixels[-1] = 0
- runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
- runs[1::2] = runs[1::2] - runs[:-1:2]
- return runs
-
-
-def rle_to_string(runs):
- return ' '.join(str(x) for x in runs)
-
-
-def dice_coef(y_true, y_pred, smooth=0.001):
- y_true = np.array(y_true).flatten()
- y_pred = np.array(y_pred).flatten()
- intersection = np.sum(y_true * y_pred)
- dice = (2. * intersection + smooth) / (np.sum(y_true) + np.sum(y_pred) + smooth)
- return np.mean(dice)
-
-
-def get_bbox(mask_file):
- binary_img = plt.imread(mask_file)
- if binary_img.ndim > 2:
- binary_img = binary_img[:, :, 0] // 255
- ymin, xmin = np.min(np.nonzero(binary_img), axis=1)
- ymax, xmax = np.max(np.nonzero(binary_img), axis=1)
- return [ymin - 1, xmin - 1, ymax - ymin + 2, xmax - xmin + 2]
-
-
-def fix_aspect_ratio(cfg, rois):
- h_roi, w_roi = cfg['roi_shape']
- roi_aspect = w_roi / h_roi
-
- aspect_rois = rois[:, 3] / rois[:, 2]
- idx_a = np.where(aspect_rois > roi_aspect)[0]
- idx_b = np.where(aspect_rois < roi_aspect)[0]
-
- rois_a = rois[idx_a, :]
- desired_h = rois_a[:, 3] / roi_aspect
- delta_h = (desired_h - rois_a[:, 2]) / 2
- rois_a[:, 0] = rois_a[:, 0] - delta_h
- rois_a[:, 2] = desired_h
-
- rois_b = rois[idx_b, :]
- desired_w = rois_b[:, 2] * roi_aspect
- delta_w = (desired_w - rois_b[:, 3]) / 2
- rois_b[:, 1] = rois_b[:, 1] - delta_w
- rois_b[:, 3] = desired_w
-
- rois[idx_a, :] = rois_a
- rois[idx_b, :] = rois_b
-
- return rois
-
-
-def filter_mask(mask):
- kernel = np.ones((2, 2))
- mask_smooth = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
- mask_smooth = np.uint8(np.round(mask_smooth))
- blobs = cv2.connectedComponentsWithStats(mask_smooth, 4, cv2.CV_32S)
- stats = blobs[2]
- obj_label = None
- for i, stat in enumerate(stats):
- if stat[4] < 10000:
- continue
- elif (stat[0] < 2) and (stat[1] < 2):
- continue
- else:
- obj_label = i
- break
- blobs = blobs[1]
- blobs[blobs != obj_label] = 0
- return np.uint8(blobs)
-
-
class ImageLoader(object):
def __init__(self, cfg):
diff --git a/utils/tfrecord_util.py b/utils/tfrecord_util.py
new file mode 100644
index 0000000..689ad87
--- /dev/null
+++ b/utils/tfrecord_util.py
@@ -0,0 +1,128 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Utility functions for creating TFRecord data sets."""
+
+import tensorflow as tf
+
+
+def int64_feature(value):
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def int64_list_feature(value):
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+
+def bytes_feature(value):
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def bytes_list_feature(value):
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
+
+
+def float_list_feature(value):
+ return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+
+def read_examples_list(path):
+ """Read list of training or validation examples.
+ The file is assumed to contain a single example per line where the first
+ token in the line is an identifier that allows us to find the image and
+ annotation xml for that example.
+ For example, the line:
+ xyz 3
+ would allow us to find files xyz.jpg and xyz.xml (the 3 would be ignored).
+ Args:
+ path: absolute path to examples list file.
+ Returns:
+ list of example identifiers (strings).
+ """
+ with tf.gfile.GFile(path) as fid:
+ lines = fid.readlines()
+ return [line.strip().split(' ')[0] for line in lines]
+
+
+def recursive_parse_xml_to_dict(xml):
+ """Recursively parses XML contents to python dict.
+ We assume that `object` tags are the only ones that can appear
+ multiple times at the same level of a tree.
+ Args:
+ xml: xml tree obtained by parsing XML file contents using lxml.etree
+ Returns:
+ Python dictionary holding XML contents.
+ """
+ if not xml:
+ return {xml.tag: xml.text}
+ result = {}
+ for child in xml:
+ child_result = recursive_parse_xml_to_dict(child)
+ if child.tag != 'object':
+ result[child.tag] = child_result[child.tag]
+ else:
+ if child.tag not in result:
+ result[child.tag] = []
+ result[child.tag].append(child_result[child.tag])
+ return {xml.tag: result}
+
+
+def make_initializable_iterator(dataset):
+ """Creates an iterator, and initializes tables.
+ This is useful in cases where make_one_shot_iterator wouldn't work because
+ the graph contains a hash table that needs to be initialized.
+ Args:
+ dataset: A `tf.data.Dataset` object.
+ Returns:
+ A `tf.data.Iterator`.
+ """
+ iterator = dataset.make_initializable_iterator()
+ tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
+ return iterator
+
+
+def read_dataset(file_read_func, decode_func, input_files, config):
+ """Reads a dataset, and handles repetition and shuffling.
+ Args:
+ file_read_func: Function to use in tf.data.Dataset.interleave, to read
+ every individual file into a tf.data.Dataset.
+ decode_func: Function to apply to all records.
+ input_files: A list of file paths to read.
+ config: A input_reader_builder.InputReader object.
+ Returns:
+ A tf.data.Dataset based on config.
+ """
+ # Shard, shuffle, and read files.
+ filenames = tf.concat([tf.matching_files(pattern) for pattern in input_files],
+ 0)
+ filename_dataset = tf.data.Dataset.from_tensor_slices(filenames)
+ if config.shuffle:
+ filename_dataset = filename_dataset.shuffle(
+ config.filenames_shuffle_buffer_size)
+ elif config.num_readers > 1:
+ tf.logging.warning('`shuffle` is false, but the input data stream is '
+ 'still slightly shuffled since `num_readers` > 1.')
+
+ filename_dataset = filename_dataset.repeat(config.num_epochs or None)
+
+ records_dataset = filename_dataset.apply(
+ tf.contrib.data.parallel_interleave(
+ file_read_func, cycle_length=config.num_readers,
+ block_length=config.read_block_length, sloppy=True))
+ if config.shuffle:
+ records_dataset.shuffle(config.shuffle_buffer_size)
+ tensor_dataset = records_dataset.map(
+ decode_func, num_parallel_calls=config.num_parallel_map_calls)
+ return tensor_dataset.prefetch(config.prefetch_size)