|
10 | 10 | from sklearn import metrics
|
11 | 11 |
|
12 | 12 |
|
13 |
| -def init_w(shape, name, reuse=False): |
14 |
| - if reuse: |
15 |
| - return tf.get_variable(name, shape, initializer=tf.contrib.layers.xavier_initializer()) |
16 |
| - init_method = tf.truncated_normal |
17 |
| - init_params = {"stddev": math.sqrt(2 / sum(shape))} |
18 |
| - w = tf.Variable(init_method(shape, **init_params), name=name) |
19 |
| - return w |
| 13 | +def init_w(shape, name): |
| 14 | + return tf.get_variable(name, shape, initializer=tf.contrib.layers.xavier_initializer()) |
20 | 15 |
|
21 | 16 |
|
22 |
| -def init_b(shape, name, reuse=False): |
23 |
| - if reuse: |
24 |
| - return tf.get_variable(name, shape, initializer=tf.zeros_initializer()) |
25 |
| - return tf.Variable(np.zeros(shape, dtype=np.float32), name=name) |
| 17 | +def init_b(shape, name): |
| 18 | + return tf.get_variable(name, shape, initializer=tf.zeros_initializer()) |
26 | 19 |
|
27 | 20 |
|
28 |
| -def fully_connected_linear(net, shape, appendix, bias=True, reuse=False): |
| 21 | +def fully_connected_linear(net, shape, appendix, pruner=None, cursor=None): |
29 | 22 | with tf.name_scope("Linear{}".format(appendix)):
|
30 | 23 | w_name = "W{}".format(appendix)
|
31 |
| - w = init_w(shape, w_name, reuse) |
32 |
| - if bias: |
33 |
| - b = init_b(shape[1], "b{}".format(appendix), reuse) |
34 |
| - return tf.add(tf.matmul(net, w), b, name="Linear{}".format(appendix)) |
35 |
| - return tf.matmul(net, w, name="Linear{}_without_bias".format(appendix)) |
| 24 | + w = init_w(shape, w_name) |
| 25 | + if pruner is not None: |
| 26 | + w = pruner.prune_w(*pruner.get_w_info(w), cursor) |
| 27 | + b = init_b(shape[1], "b{}".format(appendix)) |
| 28 | + return tf.add(tf.matmul(net, w), b, name="Linear{}".format(appendix)) |
36 | 29 |
|
37 | 30 |
|
38 | 31 | def prepare_tensorboard_verbose(sess):
|
@@ -385,6 +378,7 @@ def __init__(self, sign, snapshot_ratio, level=3, history_ratio=3, tolerance_rat
|
385 | 378 | self._running_sum = self._running_square_sum = self._running_best = self.running_epoch = None
|
386 | 379 | self._is_best = self._over_fit_performance = self._best_checkpoint_performance = None
|
387 | 380 | self._descend_counter = self._flat_counter = self._over_fitting_flag = None
|
| 381 | + self._descend_increment = self.n_history * extension / 30 |
388 | 382 |
|
389 | 383 | @property
|
390 | 384 | def rs(self):
|
@@ -442,6 +436,9 @@ def start_new_run(self):
|
442 | 436 | self.reset_monitors()
|
443 | 437 | return self
|
444 | 438 |
|
| 439 | + def punish_extension(self): |
| 440 | + self._descend_counter += self._descend_increment |
| 441 | + |
445 | 442 | def check(self, new_score):
|
446 | 443 | scores = self._scores
|
447 | 444 | scores.append(new_score * self.sign)
|
@@ -551,92 +548,85 @@ def check(self, new_score):
|
551 | 548 |
|
552 | 549 |
|
553 | 550 | class DNDF:
|
554 |
| - def __init__(self, n_class=None, n_tree=16, tree_depth=4, reuse=False): |
| 551 | + def __init__(self, n_class, n_tree=16, tree_depth=4): |
555 | 552 | self.n_class = n_class
|
556 | 553 | self.n_tree, self.tree_depth = n_tree, tree_depth
|
557 | 554 | self.n_leaf = 2 ** (tree_depth + 1)
|
558 |
| - self.reuse = reuse |
| 555 | + self.n_internals = self.n_leaf - 1 |
559 | 556 |
|
560 |
| - def __call__(self, net, n_batch_placeholder, dtype="output"): |
561 |
| - if dtype != "feature" and self.n_class is None: |
562 |
| - raise ValueError("dtype={} is not available when n_class is not provided".format(dtype)) |
| 557 | + def __call__(self, net, n_batch_placeholder, dtype="output", pruner=None, reuse_pruner=False): |
563 | 558 | name = "DNDF_{}".format(dtype)
|
564 |
| - n_leaf = 2 ** (self.tree_depth + 1) |
565 |
| - with tf.name_scope(name): |
566 |
| - flat_decisions = self.build_tree_projection(dtype, net) |
567 |
| - routes = self.build_routes(flat_decisions, n_batch_placeholder) |
568 |
| - features = tf.concat(routes, 1, name="concat") |
| 559 | + with tf.variable_scope(name, reuse=tf.AUTO_REUSE): |
| 560 | + flat_probabilities = self.build_tree_projection(dtype, net, pruner, reuse_pruner) |
| 561 | + routes = self.build_routes(flat_probabilities, n_batch_placeholder) |
| 562 | + features = tf.concat(routes, 1, name="Feature_Concat") |
569 | 563 | if dtype == "feature":
|
570 | 564 | return features
|
571 |
| - local_leafs = self.build_leafs(self.n_tree, n_leaf) |
572 |
| - if self.n_tree == 1: |
573 |
| - final_prob_vectors = tf.matmul(routes[0], local_leafs[0]) |
574 |
| - else: |
575 |
| - final_prob_vectors = tf.reduce_mean( |
576 |
| - [tf.matmul(route, leaf) for route, leaf in zip(routes, local_leafs)], |
577 |
| - 0, name=name |
578 |
| - ) |
579 |
| - return final_prob_vectors |
580 |
| - |
581 |
| - def init_prob_w(self, shape, minval, maxval, name="prob_w"): |
582 |
| - if self.reuse: |
583 |
| - return tf.get_variable(name, shape, initializer=tf.random_uniform_initializer(minval, maxval)) |
584 |
| - return tf.Variable(tf.random_uniform(shape, minval, maxval)) |
| 565 | + leafs = self.build_leafs() |
| 566 | + leafs_matrix = tf.concat(leafs, 0, name="Prob_Concat") |
| 567 | + return tf.divide( |
| 568 | + tf.matmul(features, leafs_matrix), |
| 569 | + float(self.n_tree), name=name |
| 570 | + ) |
585 | 571 |
|
586 |
| - def build_tree_projection(self, dtype, net): |
| 572 | + def build_tree_projection(self, dtype, net, pruner, reuse_pruner): |
587 | 573 | with tf.name_scope("Tree_Projection"):
|
588 |
| - flat_decisions = [] |
| 574 | + flat_probabilities = [] |
| 575 | + fc_shape = net.shape[1].value |
589 | 576 | for i in range(self.n_tree):
|
590 |
| - local_net = net |
591 | 577 | with tf.name_scope("Decisions"):
|
592 |
| - decisions = tf.nn.sigmoid(fully_connected_linear( |
593 |
| - local_net, [local_net.get_shape().as_list()[1], self.n_leaf], |
594 |
| - "_tree_mapping{}_{}".format(i, dtype), bias=True, reuse=self.reuse |
| 578 | + cursor = i if reuse_pruner else None |
| 579 | + p_left = tf.nn.sigmoid(fully_connected_linear( |
| 580 | + net=net, |
| 581 | + shape=[fc_shape, self.n_internals], |
| 582 | + appendix="_tree_mapping{}_{}".format(i, dtype), |
| 583 | + pruner=pruner, cursor=cursor |
595 | 584 | ))
|
596 |
| - decisions_comp = 1 - decisions |
597 |
| - decisions_pack = tf.stack([decisions, decisions_comp]) |
598 |
| - flat_decisions.append(tf.reshape(decisions_pack, [-1])) |
599 |
| - return flat_decisions |
| 585 | + p_right = 1 - p_left |
| 586 | + p_all = tf.concat([p_left, p_right], 1) |
| 587 | + flat_probabilities.append(tf.reshape(p_all, [-1])) |
| 588 | + return flat_probabilities |
600 | 589 |
|
601 |
| - def build_routes(self, flat_decisions, n_batch_placeholder): |
| 590 | + def build_routes(self, flat_probabilities, n_batch_placeholder): |
602 | 591 | with tf.name_scope("Routes"):
|
603 |
| - batch_0_indices = tf.reshape(tf.range(0, n_batch_placeholder * self.n_leaf, self.n_leaf), [-1, 1]) |
604 |
| - in_repeat, out_repeat = self.n_leaf // 2, 1 |
605 |
| - batch_complement_indices = tf.reshape( |
606 |
| - [[0] * in_repeat, [n_batch_placeholder * self.n_leaf] * in_repeat], |
607 |
| - [-1, self.n_leaf] |
| 592 | + n_flat_prob = 2 * self.n_internals |
| 593 | + batch_indices = tf.reshape( |
| 594 | + tf.range(0, n_flat_prob * n_batch_placeholder, n_flat_prob), |
| 595 | + [-1, 1] |
608 | 596 | )
|
| 597 | + n_repeat, n_local_internals = self.n_leaf // 2, 1 |
| 598 | + increment_mask = np.repeat([0, self.n_internals], n_repeat) |
609 | 599 | routes = [
|
610 |
| - tf.gather(flat_decision, batch_0_indices + batch_complement_indices) |
611 |
| - for flat_decision in flat_decisions |
| 600 | + tf.gather(p_flat, batch_indices + increment_mask) |
| 601 | + for p_flat in flat_probabilities |
612 | 602 | ]
|
613 |
| - for d in range(1, self.tree_depth + 1): |
614 |
| - indices = tf.range(2 ** d, 2 ** (d + 1)) - 1 |
615 |
| - tile_indices = tf.reshape( |
616 |
| - tf.tile(tf.expand_dims(indices, 1), [1, 2 ** (self.tree_depth - d + 1)]), |
617 |
| - [1, -1] |
618 |
| - ) |
619 |
| - batch_indices = batch_0_indices + tile_indices |
620 |
| - |
621 |
| - in_repeat //= 2 |
622 |
| - out_repeat *= 2 |
623 |
| - |
624 |
| - batch_complement_indices = tf.reshape( |
625 |
| - [[0] * in_repeat, [n_batch_placeholder * self.n_leaf] * in_repeat] * out_repeat, |
626 |
| - [-1, self.n_leaf] |
627 |
| - ) |
628 |
| - for i, flat_decision in enumerate(flat_decisions): |
629 |
| - routes[i] *= tf.gather(flat_decision, batch_indices + batch_complement_indices) |
| 603 | + for depth in range(1, self.tree_depth + 1): |
| 604 | + n_repeat //= 2 |
| 605 | + n_local_internals *= 2 |
| 606 | + increment_mask = np.repeat(np.arange( |
| 607 | + n_local_internals - 1, 2 * n_local_internals - 1 |
| 608 | + ), 2) |
| 609 | + increment_mask += np.tile([0, self.n_internals], n_local_internals) |
| 610 | + increment_mask = np.repeat(increment_mask, n_repeat) |
| 611 | + for i, p_flat in enumerate(flat_probabilities): |
| 612 | + routes[i] *= tf.gather(p_flat, batch_indices + increment_mask) |
630 | 613 | return routes
|
631 | 614 |
|
632 |
| - def build_leafs(self, n_tree, n_leaf): |
| 615 | + def build_leafs(self): |
633 | 616 | with tf.name_scope("Leafs"):
|
634 |
| - local_leafs = [ |
635 |
| - tf.nn.softmax( |
636 |
| - self.init_prob_w([n_leaf, self.n_class], -2, 2), |
637 |
| - name="Leafs{}".format(i) |
638 |
| - ) for i in range(n_tree) |
639 |
| - ] |
| 617 | + if self.n_class == 1: |
| 618 | + local_leafs = [ |
| 619 | + init_w([self.n_leaf, 1], "RegLeaf{}".format(i)) |
| 620 | + for i in range(self.n_tree) |
| 621 | + ] |
| 622 | + else: |
| 623 | + local_leafs = [ |
| 624 | + tf.nn.softmax(w, name="ClfLeafs{}".format(i)) |
| 625 | + for i, w in enumerate([ |
| 626 | + init_w([self.n_leaf, self.n_class], "RawClfLeafs") |
| 627 | + for _ in range(self.n_tree) |
| 628 | + ]) |
| 629 | + ] |
640 | 630 | return local_leafs
|
641 | 631 |
|
642 | 632 |
|
|
0 commit comments