-
Notifications
You must be signed in to change notification settings - Fork 0
/
ssp_model.json_bpe
1 lines (1 loc) · 34.3 KB
/
ssp_model.json_bpe
1
{"merges": [], "vocab": {"How</w>": 0, "transferable</w>": 1, "are</w>": 2, "features</w>": 3, "in</w>": 4, "deep</w>": 5, "neural</w>": 6, "networks</w>": 7, "?</w>": 8, "Jason</w>": 9, "Yosinski</w>": 10, ",</w>": 11, "1</w>": 12, "Jeff</w>": 13, "Clune</w>": 14, "2</w>": 15, "Yoshua</w>": 16, "Bengio</w>": 17, "3</w>": 18, "and</w>": 19, "Hod</w>": 20, "Lipson4</w>": 21, "Dept</w>": 22, ".</w>": 23, "Computer</w>": 24, "Science</w>": 25, "Cornell</w>": 26, "University</w>": 27, "of</w>": 28, "Wyoming</w>": 29, "&</w>": 30, "Operations</w>": 31, "Research</w>": 32, "Montreal</w>": 33, "4</w>": 34, "Mechanical</w>": 35, "Aerospace</w>": 36, "Engineering</w>": 37, "Abstract</w>": 38, "Many</w>": 39, "trained</w>": 40, "on</w>": 41, "natural</w>": 42, "images</w>": 43, "exhibit</w>": 44, "a</w>": 45, "curious</w>": 46, "phenomenon</w>": 47, "common</w>": 48, ":</w>": 49, "the</w>": 50, "first</w>": 51, "layer</w>": 52, "they</w>": 53, "learn</w>": 54, "similar</w>": 55, "to</w>": 56, "Gabor</w>": 57, "filters</w>": 58, "color</w>": 59, "blobs</w>": 60, "Such</w>": 61, "-</w>": 62, "appear</w>": 63, "not</w>": 64, "be</w>": 65, "specific</w>": 66, "particular</w>": 67, "dataset</w>": 68, "or</w>": 69, "task</w>": 70, "but</w>": 71, "general</w>": 72, "that</w>": 73, "applicable</w>": 74, "many</w>": 75, "datasets</w>": 76, "tasks</w>": 77, "Features</w>": 78, "must</w>": 79, "eventually</w>": 80, "transition</w>": 81, "from</w>": 82, "by</w>": 83, "last</w>": 84, "network</w>": 85, "this</w>": 86, "has</w>": 87, "been</w>": 88, "studied</w>": 89, "extensively</w>": 90, "In</w>": 91, "paper</w>": 92, "we</w>": 93, "experimentally</w>": 94, "quantify</w>": 95, "generality</w>": 96, "versus</w>": 97, "specificity</w>": 98, "neurons</w>": 99, "each</w>": 100, "convolutional</w>": 101, "report</w>": 102, "few</w>": 103, "surprising</w>": 104, "results</w>": 105, "Transferability</w>": 106, "is</w>": 107, "negatively</w>": 108, "affected</w>": 109, "two</w>": 110, "distinct</w>": 111, "issues</w>": 112, "(</w>": 113, ")</w>": 114, "specialization</w>": 115, "higher</w>": 116, "their</w>": 117, "original</w>": 118, "at</w>": 119, "expense</w>": 120, "performance</w>": 121, "target</w>": 122, "which</w>": 123, "was</w>": 124, "expected</w>": 125, "optimization</w>": 126, "difficulties</w>": 127, "related</w>": 128, "splitting</w>": 129, "between</w>": 130, "co</w>": 131, "adapted</w>": 132, "an</w>": 133, "example</w>": 134, "ImageNet</w>": 135, "demonstrate</w>": 136, "either</w>": 137, "these</w>": 138, "may</w>": 139, "dominate</w>": 140, "depending</w>": 141, "whether</w>": 142, "transferred</w>": 143, "bottom</w>": 144, "middle</w>": 145, "top</w>": 146, "We</w>": 147, "also</w>": 148, "document</w>": 149, "transferability</w>": 150, "decreases</w>": 151, "as</w>": 152, "distance</w>": 153, "base</w>": 154, "increases</w>": 155, "transferring</w>": 156, "even</w>": 157, "distant</w>": 158, "can</w>": 159, "better</w>": 160, "than</w>": 161, "using</w>": 162, "random</w>": 163, "A</w>": 164, "final</w>": 165, "result</w>": 166, "initializing</w>": 167, "with</w>": 168, "almost</w>": 169, "any</w>": 170, "number</w>": 171, "layers</w>": 172, "produce</w>": 173, "boost</w>": 174, "generalization</w>": 175, "lingers</w>": 176, "after</w>": 177, "fine</w>": 178, "tuning</w>": 179, "Introduction</w>": 180, "Modern</w>": 181, "when</w>": 182, "all</w>": 183, "tend</w>": 184, "resemble</w>": 185, "The</w>": 186, "appearance</w>": 187, "so</w>": 188, "obtaining</w>": 189, "anything</w>": 190, "else</w>": 191, "image</w>": 192, "causes</w>": 193, "suspicion</w>": 194, "poorly</w>": 195, "chosen</w>": 196, "hyperparameters</w>": 197, "software</w>": 198, "bug</w>": 199, "This</w>": 200, "occurs</w>": 201, "only</w>": 202, "for</w>": 203, "different</w>": 204, "very</w>": 205, "training</w>": 206, "objectives</w>": 207, "including</w>": 208, "supervised</w>": 209, "classification</w>": 210, "Krizhevsky</w>": 211, "et</w>": 212, "al</w>": 213, "2012</w>": 214, "unsupervised</w>": 215, "density</w>": 216, "learning</w>": 217, "Lee</w>": 218, "2009</w>": 219, "sparse</w>": 220, "representations</w>": 221, "Le</w>": 222, "2011</w>": 223, "Because</w>": 224, "finding</w>": 225, "standard</w>": 226, "seems</w>": 227, "occur</w>": 228, "regardless</w>": 229, "exact</w>": 230, "cost</w>": 231, "function</w>": 232, "call</w>": 233, "On</w>": 234, "other</w>": 235, "hand</w>": 236, "know</w>": 237, "computed</w>": 238, "depend</w>": 239, "greatly</w>": 240, "For</w>": 241, "N</w>": 242, "dimensional</w>": 243, "softmax</w>": 244, "output</w>": 245, "successfully</w>": 246, "toward</w>": 247, "objective</w>": 248, "unit</w>": 249, "will</w>": 250, "class</w>": 251, "thus</w>": 252, "These</w>": 253, "intuitive</w>": 254, "notions</w>": 255, "provide</w>": 256, "more</w>": 257, "rigorous</w>": 258, "definitions</w>": 259, "below</w>": 260, "If</w>": 261, "arXiv</w>": 262, "1411</w>": 263, "1792v1</w>": 264, "[</w>": 265, "cs</w>": 266, "LG</w>": 267, "]</w>": 268, "6</w>": 269, "Nov</w>": 270, "2014</w>": 271, "then</w>": 272, "there</w>": 273, "somewhere</w>": 274, "observation</w>": 275, "raises</w>": 276, "questions</w>": 277, "\u2022</w>": 278, "Can</w>": 279, "degree</w>": 280, "Does</w>": 281, "suddenly</w>": 282, "single</w>": 283, "it</w>": 284, "spread</w>": 285, "out</w>": 286, "over</w>": 287, "several</w>": 288, "Where</w>": 289, "does</w>": 290, "take</w>": 291, "place</w>": 292, "near</w>": 293, "interested</w>": 294, "answers</w>": 295, "because</w>": 296, "extent</w>": 297, "within</w>": 298, "able</w>": 299, "use</w>": 300, "them</w>": 301, "transfer</w>": 302, "Caruana</w>": 303, "1995</w>": 304, ";</w>": 305, "train</w>": 306, "repurpose</w>": 307, "learned</w>": 308, "second</w>": 309, "process</w>": 310, "work</w>": 311, "if</w>": 312, "meaning</w>": 313, "suitable</w>": 314, "both</w>": 315, "instead</w>": 316, "When</w>": 317, "significantly</w>": 318, "smaller</w>": 319, "powerful</w>": 320, "tool</w>": 321, "enable</w>": 322, "large</w>": 323, "without</w>": 324, "overfitting</w>": 325, "Recent</w>": 326, "studies</w>": 327, "have</w>": 328, "taken</w>": 329, "advantage</w>": 330, "fact</w>": 331, "obtain</w>": 332, "state</w>": 333, "art</w>": 334, "Donahue</w>": 335, "2013a</w>": 336, "Zeiler</w>": 337, "Fergus</w>": 338, "2013</w>": 339, "Sermanet</w>": 340, "collectively</w>": 341, "suggesting</w>": 342, "do</w>": 343, "indeed</w>": 344, "compute</w>": 345, "fairly</w>": 346, "further</w>": 347, "emphasize</w>": 348, "importance</w>": 349, "studying</w>": 350, "nature</w>": 351, "usual</w>": 352, "approach</w>": 353, "copy</w>": 354, "its</w>": 355, "n</w>": 356, "remaining</w>": 357, "randomly</w>": 358, "initialized</w>": 359, "One</w>": 360, "choose</w>": 361, "backpropagate</w>": 362, "errors</w>": 363, "new</w>": 364, "into</w>": 365, "copied</w>": 366, "tune</w>": 367, "feature</w>": 368, "left</w>": 369, "frozen</w>": 370, "change</w>": 371, "during</w>": 372, "choice</w>": 373, "depends</w>": 374, "size</w>": 375, "parameters</w>": 376, "small</w>": 377, "often</w>": 378, "problem</w>": 379, "tuned</w>": 380, "improve</w>": 381, "Of</w>": 382, "course</w>": 383, "would</w>": 384, "little</w>": 385, "need</w>": 386, "lower</w>": 387, "level</w>": 388, "could</w>": 389, "just</w>": 390, "scratch</w>": 391, "compare</w>": 392, "techniques</w>": 393, "\u2014</w>": 394, "following</w>": 395, "sections</w>": 396, "make</w>": 397, "contributions</w>": 398, "define</w>": 399, "way</w>": 400, "namely</w>": 401, "how</w>": 402, "well</w>": 403, "one</w>": 404, "another</w>": 405, "Section</w>": 406, "pairs</w>": 407, "characterize</w>": 408, "yields</w>": 409, "four</w>": 410, "show</w>": 411, "separate</w>": 412, "cause</w>": 413, "degradation</w>": 414, "i</w>": 415, "themselves</w>": 416, "ii</w>": 417, "due</w>": 418, "neighboring</w>": 419, "effects</w>": 420, "benefits</w>": 421, "dissimilar</w>": 422, "relatively</w>": 423, "find</w>": 424, "previously</w>": 425, "reported</w>": 426, "Jarrett</w>": 427, "weights</w>": 428, "vs</w>": 429, "perform</w>": 430, "5</w>": 431, "Finally</w>": 432, "particularly</w>": 433, "effect</w>": 434, "having</w>": 435, "seen</w>": 436, "persists</w>": 437, "extensive</w>": 438, "Generality</w>": 439, "Specificity</w>": 440, "Measured</w>": 441, "Transfer</w>": 442, "Performance</w>": 443, "noted</w>": 444, "tendency</w>": 445, "up</w>": 446, "study</w>": 447, "set</w>": 448, "used</w>": 449, "B</w>": 450, "It</w>": 451, "important</w>": 452, "note</w>": 453, "definition</w>": 454, "similarity</w>": 455, "create</w>": 456, "constructing</w>": 457, "non</w>": 458, "overlapping</w>": 459, "subsets</w>": 460, "To</w>": 461, "split</w>": 462, "1000</w>": 463, "classes</w>": 464, "groups</w>": 465, "containing</w>": 466, "500</w>": 467, "approximately</w>": 468, "half</w>": 469, "data</w>": 470, "about</w>": 471, "645</w>": 472, "000</w>": 473, "examples</w>": 474, "eight</w>": 475, "baseA</w>": 476, "baseB</w>": 477, "shown</w>": 478, "rows</w>": 479, "Figure</w>": 480, "{</w>": 481, "7</w>": 482, "}</w>": 483, "explanation</w>": 484, "=</w>": 485, "First</w>": 486, "selffer</w>": 487, "B3B</w>": 488, "five</w>": 489, "\u2013</w>": 490, "8</w>": 491, "control</w>": 492, "next</w>": 493, "row</w>": 494, "A3B</w>": 495, "Intuitively</w>": 496, "here</w>": 497, "classify</w>": 498, "performs</w>": 499, "evidence</w>": 500, "third</w>": 501, "least</w>": 502, "respect</w>": 503, "suffers</w>": 504, "repeated</w>": 505, "directions</w>": 506, "e</w>": 507, "AnB</w>": 508, "BnA</w>": 509, "above</w>": 510, "versions</w>": 511, "where</w>": 512, "+</w>": 513, "like</w>": 514, "assign</w>": 515, "contains</w>": 516, "clusters</w>": 517, "dogs</w>": 518, "cats</w>": 519, "13</w>": 520, "biological</w>": 521, "family</w>": 522, "Felidae</w>": 523, "tabby</w>": 524, "cat</w>": 525, "tiger</w>": 526, "Persian</w>": 527, "Siamese</w>": 528, "Egyptian</w>": 529, "mountain</w>": 530, "lion</w>": 531, "lynx</w>": 532, "leopard</w>": 533, "snow</w>": 534, "jaguar</w>": 535, "cheetah</w>": 536, "average</w>": 537, "contain</w>": 538, "felid</w>": 539, "levels</w>": 540, "help</w>": 541, "some</w>": 542, "types</w>": 543, "felids</w>": 544, "generalizing</w>": 545, "expect</w>": 546, "high</w>": 547, "detectors</w>": 548, "old</w>": 549, "low</w>": 550, "Thus</w>": 551, "created</w>": 552, "assigning</w>": 553, "less</w>": 554, "Fortunately</w>": 555, "provided</w>": 556, "hierarchy</w>": 557, "parent</w>": 558, "information</w>": 559, "allowed</w>": 560, "us</w>": 561, "special</w>": 562, "halves</w>": 563, "semantically</w>": 564, "possible</w>": 565, "man</w>": 566, "made</w>": 567, "entities</w>": 568, "quite</w>": 569, "551</w>": 570, "group</w>": 571, "449</w>": 572, "Further</w>": 573, "details</w>": 574, "given</w>": 575, "supplementary</w>": 576, "material</w>": 577, "1The</w>": 578, "released</w>": 579, "Large</w>": 580, "Scale</w>": 581, "Visual</w>": 582, "Recognition</w>": 583, "Challenge</w>": 584, "ILSVRC2012</w>": 585, "Deng</w>": 586, "281</w>": 587, "167</w>": 588, "labeled</w>": 589, "50</w>": 590, "test</w>": 591, "2Note</w>": 592, "doesn</w>": 593, "\u2019</w>": 594, "t</w>": 595, "sense</w>": 596, "case</w>": 597, "B8B</w>": 598, "A8B</w>": 599, "never</w>": 600, "input</w>": 601, "labels</w>": 602, "WA1</w>": 603, "WA2</w>": 604, "WA3</w>": 605, "WA4</w>": 606, "WA5</w>": 607, "WA6</w>": 608, "WA7</w>": 609, "WA8</w>": 610, "WB1</w>": 611, "WB2</w>": 612, "WB3</w>": 613, "WB4</w>": 614, "WB5</w>": 615, "WB6</w>": 616, "WB7</w>": 617, "WB8</w>": 618, "Overview</w>": 619, "experimental</w>": 620, "treatments</w>": 621, "controls</w>": 622, "Top</w>": 623, "backprop</w>": 624, "rectangles</w>": 625, "g</w>": 626, "represent</w>": 627, "weight</w>": 628, "vector</w>": 629, "indicating</w>": 630, "originally</w>": 631, "vertical</w>": 632, "ellipsoidal</w>": 633, "bars</w>": 634, "vectors</w>": 635, "activations</w>": 636, "Third</w>": 637, "upper</w>": 638, "\u2212</w>": 639, "entire</w>": 640, "same</w>": 641, "locked</w>": 642, "\u201c</w>": 643, "\u201d</w>": 644, "treatment</w>": 645, "reveals</w>": 646, "occurrence</w>": 647, "fragile</w>": 648, "coadaptation</w>": 649, "adapt</w>": 650, "such</w>": 651, "cannot</w>": 652, "rediscovered</w>": 653, "Fourth</w>": 654, "except</w>": 655, "tests</w>": 656, "Experimental</w>": 657, "Setup</w>": 658, "Since</w>": 659, "won</w>": 660, "competition</w>": 661, "much</w>": 662, "interest</w>": 663, "tweaking</w>": 664, "models</w>": 665, "However</w>": 666, "aim</w>": 667, "maximize</w>": 668, "absolute</w>": 669, "rather</w>": 670, "known</w>": 671, "architecture</w>": 672, "reference</w>": 673, "implementation</w>": 674, "Caffe</w>": 675, "Jia</w>": 676, "our</w>": 677, "comparable</w>": 678, "extensible</w>": 679, "useful</w>": 680, "researchers</w>": 681, "setup</w>": 682, "rates</w>": 683, "etc</w>": 684, "code</w>": 685, "parameter</w>": 686, "files</w>": 687, "reproduce</w>": 688, "experiments</w>": 689, "available</w>": 690, "http</w>": 691, "/</w>": 692, "yosinski</w>": 693, "com</w>": 694, "Results</w>": 695, "Discussion</w>": 696, "performed</w>": 697, "three</w>": 698, "sets</w>": 699, "main</w>": 700, "experiment</w>": 701, "splits</w>": 702, "discussed</w>": 703, "presents</w>": 704, "describes</w>": 705, "0</w>": 706, "52</w>": 707, "54</w>": 708, "56</w>": 709, "58</w>": 710, "60</w>": 711, "62</w>": 712, "64</w>": 713, "66</w>": 714, "accuracy</w>": 715, "BnB</w>": 716, "Layer</w>": 717, "chopped</w>": 718, "retrained</w>": 719, "improves</w>": 720, "Fine</w>": 721, "recovers</w>": 722, "interactions</w>": 723, "drops</w>": 724, "adaptation</w>": 725, "representation</w>": 726, "s</w>": 727, "Each</w>": 728, "marker</w>": 729, "figure</w>": 730, "represents</w>": 731, "validation</w>": 732, "white</w>": 733, "circles</w>": 734, "There</w>": 735, "points</w>": 736, "tested</w>": 737, "dark</w>": 738, "blue</w>": 739, "dot</w>": 740, "Light</w>": 741, "Dark</w>": 742, "red</w>": 743, "diamonds</w>": 744, "light</w>": 745, "Points</w>": 746, "shifted</w>": 747, "slightly</w>": 748, "right</w>": 749, "visual</w>": 750, "clarity</w>": 751, "Bottom</w>": 752, "Lines</w>": 753, "connecting</w>": 754, "means</w>": 755, "Numbered</w>": 756, "descriptions</w>": 757, "line</w>": 758, "refer</w>": 759, "interpretation</w>": 760, "applies</w>": 761, "Similar</w>": 762, "Datasets</w>": 763, "Random</w>": 764, "shown3</w>": 765, "yield</w>": 766, "conclusions</w>": 767, "interpretations</w>": 768, "dotted</w>": 769, "3AnA</w>": 770, "statistically</w>": 771, "equivalent</w>": 772, "cases</w>": 773, "simplify</w>": 774, "notation</w>": 775, "label</w>": 776, "Similarly</w>": 777, "aggregated</w>": 778, "identical</w>": 779, "subset</w>": 780, "attains</w>": 781, "625</w>": 782, "37</w>": 783, "%</w>": 784, "error</w>": 785, "42</w>": 786, "attained</w>": 787, "While</w>": 788, "might</w>": 789, "lead</w>": 790, "net</w>": 791, "ways</w>": 792, "mistakes</w>": 793, "behavior</w>": 794, "As</w>": 795, "That</w>": 796, "save</w>": 797, "reinitialize</w>": 798, "whole</w>": 799, "retrain</w>": 800, "holds</w>": 801, "true</w>": 802, "worse</w>": 803, "drop</w>": 804, "contained</w>": 805, "successive</w>": 806, "interact</w>": 807, "complex</w>": 808, "relearned</w>": 809, "alone</w>": 810, "Gradient</w>": 811, "descent</w>": 812, "good</w>": 813, "solution</w>": 814, "time</w>": 815, "were</w>": 816, "jointly</w>": 817, "By</w>": 818, "nearly</w>": 819, "back</w>": 820, "get</w>": 821, "closer</w>": 822, "relearn</w>": 823, "apparently</w>": 824, "relearning</w>": 825, "simple</w>": 826, "enough</w>": 827, "gradient</w>": 828, "Alternately</w>": 829, "say</w>": 830, "previous</w>": 831, "knowledge</w>": 832, "observed</w>": 833, "literature</w>": 834, "prevents</w>": 835, "measure</w>": 836, "Layers</w>": 837, "perfectly</w>": 838, "giving</w>": 839, "blob</w>": 840, "shows</w>": 841, "slight</w>": 842, "significant</w>": 843, "Thanks</w>": 844, "tell</w>": 845, "combination</w>": 846, "lost</w>": 847, "dominates</w>": 848, "whereas</w>": 849, "diminishes</w>": 850, "Although</w>": 851, "successful</w>": 852, "elsewhere</w>": 853, "Girshick</w>": 854, "2013b</w>": 855, "limited</w>": 856, "noticing</w>": 857, "alternative</w>": 858, "strictly</w>": 859, "believe</w>": 860, "carefully</w>": 861, "quantified</w>": 862, "decoupled</w>": 863, "showing</w>": 864, "part</w>": 865, "regime</w>": 866, "generalize</w>": 867, "those</w>": 868, "directly</w>": 869, "Previously</w>": 870, "reason</w>": 871, "want</w>": 872, "suggests</w>": 873, "Note</w>": 874, "should</w>": 875, "attributed</w>": 876, "longer</w>": 877, "total</w>": 878, "450k</w>": 879, "iterations</w>": 880, "finetuned</w>": 881, "length</w>": 882, "improvement</w>": 883, "plausible</w>": 884, "beginning</w>": 885, "completely</w>": 886, "still</w>": 887, "linger</w>": 888, "boosting</w>": 889, "through</w>": 890, "retraining</w>": 891, "keep</w>": 892, "initialize</w>": 893, "keeping</w>": 894, "anywhere</w>": 895, "seven</w>": 896, "produces</w>": 897, "improved</w>": 898, "across</w>": 899, "Table</w>": 900, "4We</w>": 901, "aggregate</w>": 902, "point</w>": 903, "computationally</w>": 904, "expensive</w>": 905, "9</w>": 906, "days</w>": 907, "GPU</w>": 908, "publication</w>": 909, "per</w>": 910, "aggregation</w>": 911, "averaged</w>": 912, "ranges</w>": 913, "mean</w>": 914, "Dissimilar</w>": 915, "Splitting</w>": 916, "Man</w>": 917, "Natural</w>": 918, "Classes</w>": 919, "Into</w>": 920, "Separate</w>": 921, "mentioned</w>": 922, "effectiveness</w>": 923, "decline</w>": 924, "become</w>": 925, "hypothesis</w>": 926, "comparing</w>": 927, "object</w>": 928, "creates</w>": 929, "subplot</w>": 930, "orange</w>": 931, "hexagons</w>": 932, "join</w>": 933, "lines</w>": 934, "categories</w>": 935, "simply</w>": 936, "being</w>": 937, "easier</w>": 938, "Weights</w>": 939, "untrained</w>": 940, "showed</w>": 941, "strikingly</w>": 942, "rectification</w>": 943, "pooling</w>": 944, "local</w>": 945, "normalization</w>": 946, "They</w>": 947, "Caltech</w>": 948, "101</w>": 949, "Fei</w>": 950, "2004</w>": 951, "ask</w>": 952, "optimal</w>": 953, "carries</w>": 954, "deeper</w>": 955, "larger</w>": 956, "obtained</w>": 957, "various</w>": 958, "choices</w>": 959, "falls</w>": 960, "off</w>": 961, "quickly</w>": 962, "chance</w>": 963, "getting</w>": 964, "straightforward</w>": 965, "comparison</w>": 966, "Whereas</w>": 967, "max</w>": 968, "did</w>": 969, "nonlinearity</w>": 970, "relu</w>": 971, "x</w>": 972, "abs</w>": 973, "tanh</w>": 974, "sizes</w>": 975, "differences</w>": 976, "Additionally</w>": 977, "considered</w>": 978, "hyperparameter</w>": 979, "architectural</w>": 980, "datapoint</w>": 981, "tweak</w>": 982, "initialization</w>": 983, "subtracting</w>": 984, "individual</w>": 985, "normalized</w>": 986, "performances</w>": 987, "plotted</w>": 988, "makes</w>": 989, "things</w>": 990, "apparent</w>": 991, "gap</w>": 992, "grows</w>": 993, "25</w>": 994, "Second</w>": 995, "latter</w>": 996, "differ</w>": 997, "fully</w>": 998, "ours</w>": 999, "informative</w>": 1000, "however</w>": 1001, "based</w>": 1002, "draws</w>": 1003, "multiple</w>": 1004, "runs</w>": 1005, "loss</w>": 1006, "failed</w>": 1007, "converge</w>": 1008, "producing</w>": 1009, "Much</w>": 1010, "convergence</w>": 1011, "30</w>": 1012, "20</w>": 1013, "15</w>": 1014, "10</w>": 1015, "05</w>": 1016, "00</w>": 1017, "Relative</w>": 1018, "m</w>": 1019, "Degradation</w>": 1020, "vice</w>": 1021, "versa</w>": 1022, "connects</w>": 1023, "consist</w>": 1024, "plots</w>": 1025, "compared</w>": 1026, "making</w>": 1027, "extra</w>": 1028, "overfit</w>": 1029, "Conclusions</w>": 1030, "demonstrated</w>": 1031, "method</w>": 1032, "quantifying</w>": 1033, "fragilely</w>": 1034, "found</w>": 1035, "substantial</w>": 1036, "generally</w>": 1037, "technique</w>": 1038, "improving</w>": 1039, "Prototypical</w>": 1040, "Networks</w>": 1041, "Few</w>": 1042, "shot</w>": 1043, "Learning</w>": 1044, "Jake</w>": 1045, "Snell</w>": 1046, "Toronto</w>": 1047, "\u2217</w>": 1048, "Kevin</w>": 1049, "Swersky</w>": 1050, "Twitter</w>": 1051, "Richard</w>": 1052, "S</w>": 1053, "Zemel</w>": 1054, "Vector</w>": 1055, "Institute</w>": 1056, "propose</w>": 1057, "prototypical</w>": 1058, "classifier</w>": 1059, "metric</w>": 1060, "space</w>": 1061, "computing</w>": 1062, "distances</w>": 1063, "prototype</w>": 1064, "Compared</w>": 1065, "recent</w>": 1066, "approaches</w>": 1067, "reflect</w>": 1068, "simpler</w>": 1069, "inductive</w>": 1070, "bias</w>": 1071, "beneficial</w>": 1072, "achieve</w>": 1073, "excellent</w>": 1074, "analysis</w>": 1075, "design</w>": 1076, "decisions</w>": 1077, "improvements</w>": 1078, "involving</w>": 1079, "complicated</w>": 1080, "meta</w>": 1081, "extend</w>": 1082, "zero</w>": 1083, "theart</w>": 1084, "CU</w>": 1085, "Birds</w>": 1086, "16</w>": 1087, "accommodate</w>": 1088, "naive</w>": 1089, "re</w>": 1090, "model</w>": 1091, "severely</w>": 1092, "difficult</w>": 1093, "humans</w>": 1094, "ability</w>": 1095, "Two</w>": 1096, "progress</w>": 1097, "Vinyals</w>": 1098, "29</w>": 1099, "proposed</w>": 1100, "matching</w>": 1101, "uses</w>": 1102, "attention</w>": 1103, "mechanism</w>": 1104, "embedding</w>": 1105, "support</w>": 1106, "predict</w>": 1107, "unlabeled</w>": 1108, "query</w>": 1109, "Matching</w>": 1110, "interpreted</w>": 1111, "weighted</w>": 1112, "nearest</w>": 1113, "neighbor</w>": 1114, "applied</w>": 1115, "Notably</w>": 1116, "utilizes</w>": 1117, "sampled</w>": 1118, "mini</w>": 1119, "batches</w>": 1120, "called</w>": 1121, "episodes</w>": 1122, "episode</w>": 1123, "designed</w>": 1124, "mimic</w>": 1125, "subsampling</w>": 1126, "faithful</w>": 1127, "environment</w>": 1128, "thereby</w>": 1129, "Ravi</w>": 1130, "Larochelle</w>": 1131, "22</w>": 1132, "episodic</w>": 1133, "idea</w>": 1134, "Their</w>": 1135, "involves</w>": 1136, "LSTM</w>": 1137, "updates</w>": 1138, "Here</w>": 1139, "learner</w>": 1140, "learns</w>": 1141, "custom</w>": 1142, "attack</w>": 1143, "addressing</w>": 1144, "key</w>": 1145, "issue</w>": 1146, "under</w>": 1147, "assumption</w>": 1148, "Our</w>": 1149, "exists</w>": 1150, "cluster</w>": 1151, "around</w>": 1152, "order</w>": 1153, "linear</w>": 1154, "mapping</w>": 1155, "Classification</w>": 1156, "embedded</w>": 1157, "follow</w>": 1158, "tackle</w>": 1159, "comes</w>": 1160, "description</w>": 1161, "therefore</w>": 1162, "shared</w>": 1163, "serve</w>": 1164, "*</w>": 1165, "Initial</w>": 1166, "author</w>": 1167, "done</w>": 1168, "while</w>": 1169, "1703</w>": 1170, "05175v2</w>": 1171, "19</w>": 1172, "Jun</w>": 1173, "2017</w>": 1174, "c1</w>": 1175, "c2</w>": 1176, "c3</w>": 1177, "v1</w>": 1178, "v2</w>": 1179, "v3</w>": 1180, "b</w>": 1181, "Zero</w>": 1182, "scenarios</w>": 1183, "Left</w>": 1184, "prototypes</w>": 1185, "ck</w>": 1186, "Right</w>": 1187, "produced</w>": 1188, "vk</w>": 1189, "classified</w>": 1190, "via</w>": 1191, "p\u03c6</w>": 1192, "y</w>": 1193, "k</w>": 1194, "|</w>": 1195, "\u221d</w>": 1196, "exp</w>": 1197, "d</w>": 1198, "f\u03c6</w>": 1199, "scenario</w>": 1200, "formulate</w>": 1201, "settings</w>": 1202, "draw</w>": 1203, "connections</w>": 1204, "setting</w>": 1205, "analyze</w>": 1206, "underlying</w>": 1207, "relate</w>": 1208, "clustering</w>": 1209, "justify</w>": 1210, "Bregman</w>": 1211, "divergence</w>": 1212, "squared</w>": 1213, "Euclidean</w>": 1214, "empirically</w>": 1215, "vital</w>": 1216, "outperforms</w>": 1217, "commonly</w>": 1218, "cosine</w>": 1219, "benchmark</w>": 1220, "efficient</w>": 1221, "algorithms</w>": 1222, "appealing</w>": 1223, "Notation</w>": 1224, "x1</w>": 1225, "y1</w>": 1226, "xN</w>": 1227, "yN</w>": 1228, "xi</w>": 1229, "\u2208</w>": 1230, "R</w>": 1231, "D</w>": 1232, "yi</w>": 1233, "K</w>": 1234, "corresponding</w>": 1235, "Sk</w>": 1236, "denotes</w>": 1237, "Model</w>": 1238, "M</w>": 1239, "RM</w>": 1240, "\u2192</w>": 1241, "learnable</w>": 1242, "\u03c6</w>": 1243, "belonging</w>": 1244, "X</w>": 1245, "Given</w>": 1246, "\u00d7</w>": 1247, "\u221e</w>": 1248, "distribution</w>": 1249, "P</w>": 1250, "k0</w>": 1251, "ck0</w>": 1252, "proceeds</w>": 1253, "minimizing</w>": 1254, "negative</w>": 1255, "log</w>": 1256, "probability</w>": 1257, "J</w>": 1258, "SGD</w>": 1259, "Training</w>": 1260, "formed</w>": 1261, "selecting</w>": 1262, "choosing</w>": 1263, "act</w>": 1264, "remainder</w>": 1265, "Pseudocode</w>": 1266, "Algorithm</w>": 1267, "computation</w>": 1268, "NC</w>": 1269, "\u2264</w>": 1270, "NS</w>": 1271, "NQ</w>": 1272, "RANDOMSAMPLE</w>": 1273, "elements</w>": 1274, "uniformly</w>": 1275, "replacement</w>": 1276, "Input</w>": 1277, "Dk</w>": 1278, "Output</w>": 1279, "generated</w>": 1280, "V</w>": 1281, "\u2190</w>": 1282, "Select</w>": 1283, "indices</w>": 1284, "DVk</w>": 1285, "Qk</w>": 1286, "\\</w>": 1287, "Compute</w>": 1288, "end</w>": 1289, "Initialize</w>": 1290, "\"</w>": 1291, "logX</w>": 1292, "#</w>": 1293, "Update</w>": 1294, "Mixture</w>": 1295, "Density</w>": 1296, "Estimation</w>": 1297, "functions</w>": 1298, "regular</w>": 1299, "divergences</w>": 1300, "algorithm</w>": 1301, "performing</w>": 1302, "mixture</w>": 1303, "estimation</w>": 1304, "exponential</w>": 1305, "d\u03d5</w>": 1306, "defined</w>": 1307, "\u03d5</w>": 1308, "differentiable</w>": 1309, "convex</w>": 1310, "Legendre</w>": 1311, "type</w>": 1312, "Examples</w>": 1313, "include</w>": 1314, "kz</w>": 1315, "z</w>": 1316, "0k</w>": 1317, "Mahalanobis</w>": 1318, "Prototype</w>": 1319, "viewed</w>": 1320, "terms</w>": 1321, "hard</w>": 1322, "assigned</w>": 1323, "representative</w>": 1324, "achieving</w>": 1325, "minimal</w>": 1326, "Equation</w>": 1327, "representatives</w>": 1328, "Moreover</w>": 1329, "p\u03c8</w>": 1330, "\u03b8</w>": 1331, "cumulant</w>": 1332, "\u03c8</w>": 1333, "written</w>": 1334, "uniquely</w>": 1335, "determined</w>": 1336, "equally</w>": 1337, "assignment</w>": 1338, "inference</w>": 1339, "prediction</w>": 1340, "\u00b5</w>": 1341, "\u03b8k</w>": 1342, "effectively</w>": 1343, "specifies</w>": 1344, "modeling</w>": 1345, "assumptions</w>": 1346, "classconditional</w>": 1347, "Reinterpretation</w>": 1348, "Linear</w>": 1349, "gaining</w>": 1350, "insight</w>": 1351, "parameterization</w>": 1352, "term</w>": 1353, "constant</w>": 1354, "affect</w>": 1355, "probabilities</w>": 1356, "focus</w>": 1357, "primarily</w>": 1358, "spherical</w>": 1359, "Gaussian</w>": 1360, "densities</w>": 1361, "indicate</w>": 1362, "effective</w>": 1363, "despite</w>": 1364, "equivalence</w>": 1365, "hypothesize</w>": 1366, "required</w>": 1367, "linearity</w>": 1368, "Indeed</w>": 1369, "modern</w>": 1370, "systems</w>": 1371, "currently</w>": 1372, "14</w>": 1373, "28</w>": 1374, "Comparison</w>": 1375, "xk</w>": 1376, "since</w>": 1377, "question</w>": 1378, "fixed</w>": 1379, "greater</w>": 1380, "require</w>": 1381, "partitioning</w>": 1382, "scheme</w>": 1383, "Mensink</w>": 1384, "Rippel</w>": 1385, "methods</w>": 1386, "phase</w>": 1387, "ordinary</w>": 1388, "extensions</w>": 1389, "decoupling</w>": 1390, "conditional</w>": 1391, "FCE</w>": 1392, "takes</w>": 1393, "account</w>": 1394, "likewise</w>": 1395, "incorporated</w>": 1396, "increase</w>": 1397, "imposes</w>": 1398, "arbitrary</w>": 1399, "ordering</w>": 1400, "bi</w>": 1401, "directional</w>": 1402, "Instead</w>": 1403, "outline</w>": 1404, "Design</w>": 1405, "Choices</w>": 1406, "Distance</w>": 1407, "apply</w>": 1408, "permissible</w>": 1409, "conjecture</w>": 1410, "hold</w>": 1411, "Episode</w>": 1412, "composition</w>": 1413, "construct</w>": 1414, "Nc</w>": 1415, "match</w>": 1416, "situation</w>": 1417, "comprised</w>": 1418, "extremely</w>": 1419, "held</w>": 1420, "Another</w>": 1421, "consideration</w>": 1422, "usually</w>": 1423, "best</w>": 1424, "Shot</w>": 1425, "differs</w>": 1426, "accuracies</w>": 1427, "Omniglot</w>": 1428, "Acc</w>": 1429, "Dist</w>": 1430, "Tune</w>": 1431, "MATCHING</w>": 1432, "NETWORKS</w>": 1433, "Cosine</w>": 1434, "98</w>": 1435, "93</w>": 1436, "Y</w>": 1437, "97</w>": 1438, "NEURAL</w>": 1439, "STATISTICIAN</w>": 1440, "99</w>": 1441, "PROTOTYPICAL</w>": 1442, "OURS</w>": 1443, "Euclid</w>": 1444, "96</w>": 1445, "advance</w>": 1446, "raw</w>": 1447, "text</w>": 1448, "Modifying</w>": 1449, "deal</w>": 1450, "g\u03d1</w>": 1451, "An</w>": 1452, "illustration</w>": 1453, "procedure</w>": 1454, "relates</w>": 1455, "come</w>": 1456, "domains</w>": 1457, "helpful</w>": 1458, "fix</w>": 1459, "constrain</w>": 1460, "f</w>": 1461, "Experiments</w>": 1462, "miniImageNet</w>": 1463, "version</w>": 1464, "ILSVRC</w>": 1465, "26</w>": 1466, "UCSD</w>": 1467, "bird</w>": 1468, "CUB</w>": 1469, "200</w>": 1470, "31</w>": 1471, "1623</w>": 1472, "handwritten</w>": 1473, "characters</w>": 1474, "collected</w>": 1475, "alphabets</w>": 1476, "associated</w>": 1477, "character</w>": 1478, "drawn</w>": 1479, "human</w>": 1480, "subject</w>": 1481, "resizing</w>": 1482, "grayscale</w>": 1483, "augmenting</w>": 1484, "rotations</w>": 1485, "multiples</w>": 1486, "90</w>": 1487, "degrees</w>": 1488, "1200</w>": 1489, "plus</w>": 1490, "800</w>": 1491, "mirrors</w>": 1492, "composed</w>": 1493, "blocks</w>": 1494, "block</w>": 1495, "comprises</w>": 1496, "filter</w>": 1497, "convolution</w>": 1498, "batch</w>": 1499, "ReLU</w>": 1500, "encoder</w>": 1501, "All</w>": 1502, "Adam</w>": 1503, "11</w>": 1504, "initial</w>": 1505, "rate</w>": 1506, "cut</w>": 1507, "every</w>": 1508, "2000</w>": 1509, "No</w>": 1510, "regularization</w>": 1511, "advantageous</w>": 1512, "fewer</w>": 1513, "against</w>": 1514, "baselines</w>": 1515, "statistician</w>": 1516, "derived</w>": 1517, "12</w>": 1518, "84</w>": 1519, "divided</w>": 1520, "100</w>": 1521, "600</w>": 1522, "introduced</w>": 1523, "monitoring</w>": 1524, "though</w>": 1525, "1600</w>": 1526, "increased</w>": 1527, "95</w>": 1528, "confidence</w>": 1529, "intervals</w>": 1530, "BASELINE</w>": 1531, "NEAREST</w>": 1532, "NEIGHBORS</w>": 1533, "86</w>": 1534, "\u00b1</w>": 1535, "49</w>": 1536, "79</w>": 1537, "43</w>": 1538, "40</w>": 1539, "78</w>": 1540, "51</w>": 1541, "09</w>": 1542, "71</w>": 1543, "55</w>": 1544, "73</w>": 1545, "META</w>": 1546, "LEARNER</w>": 1547, "44</w>": 1548, "77</w>": 1549, "68</w>": 1550, "70</w>": 1551, "80</w>": 1552, "Accuracy</w>": 1553, "Proto</w>": 1554, "Nets</w>": 1555, "axis</w>": 1556, "indicates</w>": 1557, "configuration</w>": 1558, "Error</w>": 1559, "schedule</w>": 1560, "until</w>": 1561, "stops</w>": 1562, "variants</w>": 1563, "Meta</w>": 1564, "Learner</w>": 1565, "achieves</w>": 1566, "wide</w>": 1567, "margin</w>": 1568, "conducted</w>": 1569, "determine</w>": 1570, "own</w>": 1571, "difficulty</w>": 1572, "helps</w>": 1573, "forces</w>": 1574, "grained</w>": 1575, "Also</w>": 1576, "substantially</w>": 1577, "pronounced</w>": 1578, "naturally</w>": 1579, "suited</w>": 1580, "assess</w>": 1581, "suitability</w>": 1582, "run</w>": 1583, "788</w>": 1584, "species</w>": 1585, "closely</w>": 1586, "Reed</w>": 1587, "23</w>": 1588, "preparing</w>": 1589, "Image</w>": 1590, "ALE</w>": 1591, "Fisher</w>": 1592, "SJE</w>": 1593, "AlexNet</w>": 1594, "SAMPLE</w>": 1595, "CLUSTERING</w>": 1596, "17</w>": 1597, "GoogLeNet</w>": 1598, "DS</w>": 1599, "DA</w>": 1600, "PROTO</w>": 1601, "NETS</w>": 1602, "divide</w>": 1603, "024</w>": 1604, "extracted</w>": 1605, "applying</w>": 1606, "crops</w>": 1607, "horizontally</w>": 1608, "flipped</w>": 1609, "image2</w>": 1610, "At</w>": 1611, "crop</w>": 1612, "312</w>": 1613, "continuous</w>": 1614, "attribute</w>": 1615, "attributes</w>": 1616, "encode</w>": 1617, "characteristics</w>": 1618, "shape</w>": 1619, "feather</w>": 1620, "patterns</w>": 1621, "1024</w>": 1622, "normalize</w>": 1623, "domain</w>": 1624, "constructed</w>": 1625, "embeddings</w>": 1626, "optimized</w>": 1627, "decay</w>": 1628, "Early</w>": 1629, "stopping</w>": 1630, "epochs</w>": 1631, "utilizing</w>": 1632, "trains</w>": 1633, "SVM</w>": 1634, "relative</w>": 1635, "Related</w>": 1636, "Work</w>": 1637, "vast</w>": 1638, "summarize</w>": 1639, "most</w>": 1640, "relevant</w>": 1641, "Neighborhood</w>": 1642, "Components</w>": 1643, "Analysis</w>": 1644, "NCA</w>": 1645, "KNN</w>": 1646, "leave</w>": 1647, "transformed</w>": 1648, "Salakhutdinov</w>": 1649, "Hinton</w>": 1650, "27</w>": 1651, "transformation</w>": 1652, "LMNN</w>": 1653, "attempts</w>": 1654, "optimize</w>": 1655, "hinge</w>": 1656, "encourages</w>": 1657, "neighborhood</w>": 1658, "DNet</w>": 1659, "21</w>": 1660, "upon</w>": 1661, "extension</w>": 1662, "opposed</w>": 1663, "distinction</w>": 1664, "form</w>": 1665, "allows</w>": 1666, "concise</w>": 1667, "independent</w>": 1668, "obviates</w>": 1669, "store</w>": 1670, "predictions</w>": 1671, "represented</w>": 1672, "developed</w>": 1673, "rapidly</w>": 1674, "incorporate</w>": 1675, "relies</w>": 1676, "handle</w>": 1677, "downloaded</w>": 1678, "https</w>": 1679, "github</w>": 1680, "reedscot</w>": 1681, "cvpr2016</w>": 1682, "novel</w>": 1683, "contrast</w>": 1684, "linearly</w>": 1685, "embed</w>": 1686, "couple</w>": 1687, "attempt</w>": 1688, "allowing</w>": 1689, "pre</w>": 1690, "processing</w>": 1691, "step</w>": 1692, "multi</w>": 1693, "modal</w>": 1694, "variant</w>": 1695, "manner</w>": 1696, "no</w>": 1697, "requires</w>": 1698, "addition</w>": 1699, "generalizes</w>": 1700, "dynamics</w>": 1701, "itself</w>": 1702, "goal</w>": 1703, "forms</w>": 1704, "classifiers</w>": 1705, "dynamically</w>": 1706, "core</w>": 1707, "rely</w>": 1708, "nets</w>": 1709, "secondary</w>": 1710, "amount</w>": 1711, "generative</w>": 1712, "extends</w>": 1713, "variational</w>": 1714, "autoencoder</w>": 1715, "24</w>": 1716, "component</w>": 1717, "statistic</w>": 1718, "summarizes</w>": 1719, "encoding</w>": 1720, "taking</w>": 1721, "sample</w>": 1722, "post</w>": 1723, "approximate</w>": 1724, "posterior</w>": 1725, "Edwards</w>": 1726, "Storkey</w>": 1727, "considering</w>": 1728, "whose</w>": 1729, "KL</w>": 1730, "inferred</w>": 1731, "Like</w>": 1732, "summary</w>": 1733, "discriminative</w>": 1734, "befits</w>": 1735, "With</w>": 1736, "resembles</w>": 1737, "multimodal</w>": 1738, "Unlike</w>": 1739, "empirical</w>": 1740, "risk</w>": 1741, "Neither</w>": 1742, "nor</w>": 1743, "speed</w>": 1744, "regularize</w>": 1745, "Conclusion</w>": 1746, "specifically</w>": 1747, "far</w>": 1748, "sophisticated</w>": 1749, "although</w>": 1750, "modifying</w>": 1751, "direction</w>": 1752, "future</w>": 1753, "utilize</w>": 1754, "distributions</w>": 1755, "beyond</w>": 1756, "Gaussians</w>": 1757, "preliminary</w>": 1758, "explorations</w>": 1759, "variance</w>": 1760, "dimension</w>": 1761, "gains</w>": 1762, "flexibility</w>": 1763, "requiring</w>": 1764, "additional</w>": 1765, "fitted</w>": 1766, "Overall</w>": 1767, "simplicity</w>": 1768, "promising</w>": 1769}, "num_merges": 30522}