-
Notifications
You must be signed in to change notification settings - Fork 0
/
ssp_model.json
1 lines (1 loc) · 71.9 KB
/
ssp_model.json
1
{"vocab": {"How</w>": 0, "transferable</w>": 1, "are</w>": 2, "features</w>": 3, "in</w>": 4, "deep</w>": 5, "neural</w>": 6, "networks</w>": 7, "?</w>": 8, "Jason</w>": 9, "Yosinski</w>": 10, ",</w>": 11, "1</w>": 12, "Jeff</w>": 13, "Clune</w>": 14, "2</w>": 15, "Yoshua</w>": 16, "Bengio</w>": 17, "3</w>": 18, "and</w>": 19, "Hod</w>": 20, "Lipson4</w>": 21, "Dept</w>": 22, ".</w>": 23, "Computer</w>": 24, "Science</w>": 25, "Cornell</w>": 26, "University</w>": 27, "of</w>": 28, "Wyoming</w>": 29, "&</w>": 30, "Operations</w>": 31, "Research</w>": 32, "Montreal</w>": 33, "4</w>": 34, "Mechanical</w>": 35, "Aerospace</w>": 36, "Engineering</w>": 37, "Abstract</w>": 38, "Many</w>": 39, "trained</w>": 40, "on</w>": 41, "natural</w>": 42, "images</w>": 43, "exhibit</w>": 44, "a</w>": 45, "curious</w>": 46, "phenomenon</w>": 47, "common</w>": 48, ":</w>": 49, "the</w>": 50, "first</w>": 51, "layer</w>": 52, "they</w>": 53, "learn</w>": 54, "similar</w>": 55, "to</w>": 56, "Gabor</w>": 57, "filters</w>": 58, "color</w>": 59, "blobs</w>": 60, "Such</w>": 61, "-</w>": 62, "appear</w>": 63, "not</w>": 64, "be</w>": 65, "specific</w>": 66, "particular</w>": 67, "dataset</w>": 68, "or</w>": 69, "task</w>": 70, "but</w>": 71, "general</w>": 72, "that</w>": 73, "applicable</w>": 74, "many</w>": 75, "datasets</w>": 76, "tasks</w>": 77, "Features</w>": 78, "must</w>": 79, "eventually</w>": 80, "transition</w>": 81, "from</w>": 82, "by</w>": 83, "last</w>": 84, "network</w>": 85, "this</w>": 86, "has</w>": 87, "been</w>": 88, "studied</w>": 89, "extensively</w>": 90, "In</w>": 91, "paper</w>": 92, "we</w>": 93, "experimentally</w>": 94, "quantify</w>": 95, "generality</w>": 96, "versus</w>": 97, "specificity</w>": 98, "neurons</w>": 99, "each</w>": 100, "convolutional</w>": 101, "report</w>": 102, "few</w>": 103, "surprising</w>": 104, "results</w>": 105, "Transferability</w>": 106, "is</w>": 107, "negatively</w>": 108, "affected</w>": 109, "two</w>": 110, "distinct</w>": 111, "issues</w>": 112, "(</w>": 113, ")</w>": 114, "specialization</w>": 115, "higher</w>": 116, "their</w>": 117, "original</w>": 118, "at</w>": 119, "expense</w>": 120, "performance</w>": 121, "target</w>": 122, "which</w>": 123, "was</w>": 124, "expected</w>": 125, "optimization</w>": 126, "difficulties</w>": 127, "related</w>": 128, "splitting</w>": 129, "between</w>": 130, "co</w>": 131, "adapted</w>": 132, "an</w>": 133, "example</w>": 134, "ImageNet</w>": 135, "demonstrate</w>": 136, "either</w>": 137, "these</w>": 138, "may</w>": 139, "dominate</w>": 140, "depending</w>": 141, "whether</w>": 142, "transferred</w>": 143, "bottom</w>": 144, "middle</w>": 145, "top</w>": 146, "We</w>": 147, "also</w>": 148, "document</w>": 149, "transferability</w>": 150, "decreases</w>": 151, "as</w>": 152, "distance</w>": 153, "base</w>": 154, "increases</w>": 155, "transferring</w>": 156, "even</w>": 157, "distant</w>": 158, "can</w>": 159, "better</w>": 160, "than</w>": 161, "using</w>": 162, "random</w>": 163, "A</w>": 164, "final</w>": 165, "result</w>": 166, "initializing</w>": 167, "with</w>": 168, "almost</w>": 169, "any</w>": 170, "number</w>": 171, "layers</w>": 172, "produce</w>": 173, "boost</w>": 174, "generalization</w>": 175, "lingers</w>": 176, "after</w>": 177, "fine</w>": 178, "tuning</w>": 179, "Introduction</w>": 180, "Modern</w>": 181, "when</w>": 182, "all</w>": 183, "tend</w>": 184, "resemble</w>": 185, "The</w>": 186, "appearance</w>": 187, "so</w>": 188, "obtaining</w>": 189, "anything</w>": 190, "else</w>": 191, "image</w>": 192, "causes</w>": 193, "suspicion</w>": 194, "poorly</w>": 195, "chosen</w>": 196, "hyperparameters</w>": 197, "software</w>": 198, "bug</w>": 199, "This</w>": 200, "occurs</w>": 201, "only</w>": 202, "for</w>": 203, "different</w>": 204, "very</w>": 205, "training</w>": 206, "objectives</w>": 207, "including</w>": 208, "supervised</w>": 209, "classification</w>": 210, "Krizhevsky</w>": 211, "et</w>": 212, "al</w>": 213, "2012</w>": 214, "unsupervised</w>": 215, "density</w>": 216, "learning</w>": 217, "Lee</w>": 218, "2009</w>": 219, "sparse</w>": 220, "representations</w>": 221, "Le</w>": 222, "2011</w>": 223, "Because</w>": 224, "finding</w>": 225, "standard</w>": 226, "seems</w>": 227, "occur</w>": 228, "regardless</w>": 229, "exact</w>": 230, "cost</w>": 231, "function</w>": 232, "call</w>": 233, "On</w>": 234, "other</w>": 235, "hand</w>": 236, "know</w>": 237, "computed</w>": 238, "depend</w>": 239, "greatly</w>": 240, "For</w>": 241, "N</w>": 242, "dimensional</w>": 243, "softmax</w>": 244, "output</w>": 245, "successfully</w>": 246, "toward</w>": 247, "objective</w>": 248, "unit</w>": 249, "will</w>": 250, "class</w>": 251, "thus</w>": 252, "These</w>": 253, "intuitive</w>": 254, "notions</w>": 255, "provide</w>": 256, "more</w>": 257, "rigorous</w>": 258, "definitions</w>": 259, "below</w>": 260, "If</w>": 261, "arXiv</w>": 262, "1411</w>": 263, "1792v1</w>": 264, "[</w>": 265, "cs</w>": 266, "LG</w>": 267, "]</w>": 268, "6</w>": 269, "Nov</w>": 270, "2014</w>": 271, "then</w>": 272, "there</w>": 273, "somewhere</w>": 274, "observation</w>": 275, "raises</w>": 276, "questions</w>": 277, "\u2022</w>": 278, "Can</w>": 279, "degree</w>": 280, "Does</w>": 281, "suddenly</w>": 282, "single</w>": 283, "it</w>": 284, "spread</w>": 285, "out</w>": 286, "over</w>": 287, "several</w>": 288, "Where</w>": 289, "does</w>": 290, "take</w>": 291, "place</w>": 292, "near</w>": 293, "interested</w>": 294, "answers</w>": 295, "because</w>": 296, "extent</w>": 297, "within</w>": 298, "able</w>": 299, "use</w>": 300, "them</w>": 301, "transfer</w>": 302, "Caruana</w>": 303, "1995</w>": 304, ";</w>": 305, "train</w>": 306, "repurpose</w>": 307, "learned</w>": 308, "second</w>": 309, "process</w>": 310, "work</w>": 311, "if</w>": 312, "meaning</w>": 313, "suitable</w>": 314, "both</w>": 315, "instead</w>": 316, "When</w>": 317, "significantly</w>": 318, "smaller</w>": 319, "powerful</w>": 320, "tool</w>": 321, "enable</w>": 322, "large</w>": 323, "without</w>": 324, "overfitting</w>": 325, "Recent</w>": 326, "studies</w>": 327, "have</w>": 328, "taken</w>": 329, "advantage</w>": 330, "fact</w>": 331, "obtain</w>": 332, "state</w>": 333, "art</w>": 334, "Donahue</w>": 335, "2013a</w>": 336, "Zeiler</w>": 337, "Fergus</w>": 338, "2013</w>": 339, "Sermanet</w>": 340, "collectively</w>": 341, "suggesting</w>": 342, "do</w>": 343, "indeed</w>": 344, "compute</w>": 345, "fairly</w>": 346, "further</w>": 347, "emphasize</w>": 348, "importance</w>": 349, "studying</w>": 350, "nature</w>": 351, "usual</w>": 352, "approach</w>": 353, "copy</w>": 354, "its</w>": 355, "n</w>": 356, "remaining</w>": 357, "randomly</w>": 358, "initialized</w>": 359, "One</w>": 360, "choose</w>": 361, "backpropagate</w>": 362, "errors</w>": 363, "new</w>": 364, "into</w>": 365, "copied</w>": 366, "tune</w>": 367, "feature</w>": 368, "left</w>": 369, "frozen</w>": 370, "change</w>": 371, "during</w>": 372, "choice</w>": 373, "depends</w>": 374, "size</w>": 375, "parameters</w>": 376, "small</w>": 377, "often</w>": 378, "problem</w>": 379, "tuned</w>": 380, "improve</w>": 381, "Of</w>": 382, "course</w>": 383, "would</w>": 384, "little</w>": 385, "need</w>": 386, "lower</w>": 387, "level</w>": 388, "could</w>": 389, "just</w>": 390, "scratch</w>": 391, "compare</w>": 392, "techniques</w>": 393, "\u2014</w>": 394, "following</w>": 395, "sections</w>": 396, "make</w>": 397, "contributions</w>": 398, "define</w>": 399, "way</w>": 400, "namely</w>": 401, "how</w>": 402, "well</w>": 403, "one</w>": 404, "another</w>": 405, "Section</w>": 406, "pairs</w>": 407, "characterize</w>": 408, "yields</w>": 409, "four</w>": 410, "show</w>": 411, "separate</w>": 412, "cause</w>": 413, "degradation</w>": 414, "i</w>": 415, "themselves</w>": 416, "ii</w>": 417, "due</w>": 418, "neighboring</w>": 419, "effects</w>": 420, "benefits</w>": 421, "dissimilar</w>": 422, "relatively</w>": 423, "find</w>": 424, "previously</w>": 425, "reported</w>": 426, "Jarrett</w>": 427, "weights</w>": 428, "vs</w>": 429, "perform</w>": 430, "5</w>": 431, "Finally</w>": 432, "particularly</w>": 433, "effect</w>": 434, "having</w>": 435, "seen</w>": 436, "persists</w>": 437, "extensive</w>": 438, "Generality</w>": 439, "Specificity</w>": 440, "Measured</w>": 441, "Transfer</w>": 442, "Performance</w>": 443, "noted</w>": 444, "tendency</w>": 445, "up</w>": 446, "study</w>": 447, "set</w>": 448, "used</w>": 449, "B</w>": 450, "It</w>": 451, "important</w>": 452, "note</w>": 453, "definition</w>": 454, "similarity</w>": 455, "create</w>": 456, "constructing</w>": 457, "non</w>": 458, "overlapping</w>": 459, "subsets</w>": 460, "To</w>": 461, "split</w>": 462, "1000</w>": 463, "classes</w>": 464, "groups</w>": 465, "containing</w>": 466, "500</w>": 467, "approximately</w>": 468, "half</w>": 469, "data</w>": 470, "about</w>": 471, "645</w>": 472, "000</w>": 473, "examples</w>": 474, "eight</w>": 475, "baseA</w>": 476, "baseB</w>": 477, "shown</w>": 478, "rows</w>": 479, "Figure</w>": 480, "{</w>": 481, "7</w>": 482, "}</w>": 483, "explanation</w>": 484, "=</w>": 485, "First</w>": 486, "selffer</w>": 487, "B3B</w>": 488, "five</w>": 489, "\u2013</w>": 490, "8</w>": 491, "control</w>": 492, "next</w>": 493, "row</w>": 494, "A3B</w>": 495, "Intuitively</w>": 496, "here</w>": 497, "classify</w>": 498, "performs</w>": 499, "evidence</w>": 500, "third</w>": 501, "least</w>": 502, "respect</w>": 503, "suffers</w>": 504, "repeated</w>": 505, "directions</w>": 506, "e</w>": 507, "AnB</w>": 508, "BnA</w>": 509, "above</w>": 510, "versions</w>": 511, "where</w>": 512, "+</w>": 513, "like</w>": 514, "assign</w>": 515, "contains</w>": 516, "clusters</w>": 517, "dogs</w>": 518, "cats</w>": 519, "13</w>": 520, "biological</w>": 521, "family</w>": 522, "Felidae</w>": 523, "tabby</w>": 524, "cat</w>": 525, "tiger</w>": 526, "Persian</w>": 527, "Siamese</w>": 528, "Egyptian</w>": 529, "mountain</w>": 530, "lion</w>": 531, "lynx</w>": 532, "leopard</w>": 533, "snow</w>": 534, "jaguar</w>": 535, "cheetah</w>": 536, "average</w>": 537, "contain</w>": 538, "felid</w>": 539, "levels</w>": 540, "help</w>": 541, "some</w>": 542, "types</w>": 543, "felids</w>": 544, "generalizing</w>": 545, "expect</w>": 546, "high</w>": 547, "detectors</w>": 548, "old</w>": 549, "low</w>": 550, "Thus</w>": 551, "created</w>": 552, "assigning</w>": 553, "less</w>": 554, "Fortunately</w>": 555, "provided</w>": 556, "hierarchy</w>": 557, "parent</w>": 558, "information</w>": 559, "allowed</w>": 560, "us</w>": 561, "special</w>": 562, "halves</w>": 563, "semantically</w>": 564, "possible</w>": 565, "man</w>": 566, "made</w>": 567, "entities</w>": 568, "quite</w>": 569, "551</w>": 570, "group</w>": 571, "449</w>": 572, "Further</w>": 573, "details</w>": 574, "given</w>": 575, "supplementary</w>": 576, "material</w>": 577, "1The</w>": 578, "released</w>": 579, "Large</w>": 580, "Scale</w>": 581, "Visual</w>": 582, "Recognition</w>": 583, "Challenge</w>": 584, "ILSVRC2012</w>": 585, "Deng</w>": 586, "281</w>": 587, "167</w>": 588, "labeled</w>": 589, "50</w>": 590, "test</w>": 591, "2Note</w>": 592, "doesn</w>": 593, "\u2019</w>": 594, "t</w>": 595, "sense</w>": 596, "case</w>": 597, "B8B</w>": 598, "A8B</w>": 599, "never</w>": 600, "input</w>": 601, "labels</w>": 602, "WA1</w>": 603, "WA2</w>": 604, "WA3</w>": 605, "WA4</w>": 606, "WA5</w>": 607, "WA6</w>": 608, "WA7</w>": 609, "WA8</w>": 610, "WB1</w>": 611, "WB2</w>": 612, "WB3</w>": 613, "WB4</w>": 614, "WB5</w>": 615, "WB6</w>": 616, "WB7</w>": 617, "WB8</w>": 618, "Overview</w>": 619, "experimental</w>": 620, "treatments</w>": 621, "controls</w>": 622, "Top</w>": 623, "backprop</w>": 624, "rectangles</w>": 625, "g</w>": 626, "represent</w>": 627, "weight</w>": 628, "vector</w>": 629, "indicating</w>": 630, "originally</w>": 631, "vertical</w>": 632, "ellipsoidal</w>": 633, "bars</w>": 634, "vectors</w>": 635, "activations</w>": 636, "Third</w>": 637, "upper</w>": 638, "\u2212</w>": 639, "entire</w>": 640, "same</w>": 641, "locked</w>": 642, "\u201c</w>": 643, "\u201d</w>": 644, "treatment</w>": 645, "reveals</w>": 646, "occurrence</w>": 647, "fragile</w>": 648, "coadaptation</w>": 649, "adapt</w>": 650, "such</w>": 651, "cannot</w>": 652, "rediscovered</w>": 653, "Fourth</w>": 654, "except</w>": 655, "tests</w>": 656, "Experimental</w>": 657, "Setup</w>": 658, "Since</w>": 659, "won</w>": 660, "competition</w>": 661, "much</w>": 662, "interest</w>": 663, "tweaking</w>": 664, "models</w>": 665, "However</w>": 666, "aim</w>": 667, "maximize</w>": 668, "absolute</w>": 669, "rather</w>": 670, "known</w>": 671, "architecture</w>": 672, "reference</w>": 673, "implementation</w>": 674, "Caffe</w>": 675, "Jia</w>": 676, "our</w>": 677, "comparable</w>": 678, "extensible</w>": 679, "useful</w>": 680, "researchers</w>": 681, "setup</w>": 682, "rates</w>": 683, "etc</w>": 684, "code</w>": 685, "parameter</w>": 686, "files</w>": 687, "reproduce</w>": 688, "experiments</w>": 689, "available</w>": 690, "http</w>": 691, "/</w>": 692, "yosinski</w>": 693, "com</w>": 694, "Results</w>": 695, "Discussion</w>": 696, "performed</w>": 697, "three</w>": 698, "sets</w>": 699, "main</w>": 700, "experiment</w>": 701, "splits</w>": 702, "discussed</w>": 703, "presents</w>": 704, "describes</w>": 705, "0</w>": 706, "52</w>": 707, "54</w>": 708, "56</w>": 709, "58</w>": 710, "60</w>": 711, "62</w>": 712, "64</w>": 713, "66</w>": 714, "accuracy</w>": 715, "BnB</w>": 716, "Layer</w>": 717, "chopped</w>": 718, "retrained</w>": 719, "improves</w>": 720, "Fine</w>": 721, "recovers</w>": 722, "interactions</w>": 723, "drops</w>": 724, "adaptation</w>": 725, "representation</w>": 726, "s</w>": 727, "Each</w>": 728, "marker</w>": 729, "figure</w>": 730, "represents</w>": 731, "validation</w>": 732, "white</w>": 733, "circles</w>": 734, "There</w>": 735, "points</w>": 736, "tested</w>": 737, "dark</w>": 738, "blue</w>": 739, "dot</w>": 740, "Light</w>": 741, "Dark</w>": 742, "red</w>": 743, "diamonds</w>": 744, "light</w>": 745, "Points</w>": 746, "shifted</w>": 747, "slightly</w>": 748, "right</w>": 749, "visual</w>": 750, "clarity</w>": 751, "Bottom</w>": 752, "Lines</w>": 753, "connecting</w>": 754, "means</w>": 755, "Numbered</w>": 756, "descriptions</w>": 757, "line</w>": 758, "refer</w>": 759, "interpretation</w>": 760, "applies</w>": 761, "Similar</w>": 762, "Datasets</w>": 763, "Random</w>": 764, "shown3</w>": 765, "yield</w>": 766, "conclusions</w>": 767, "interpretations</w>": 768, "dotted</w>": 769, "3AnA</w>": 770, "statistically</w>": 771, "equivalent</w>": 772, "cases</w>": 773, "simplify</w>": 774, "notation</w>": 775, "label</w>": 776, "Similarly</w>": 777, "aggregated</w>": 778, "identical</w>": 779, "subset</w>": 780, "attains</w>": 781, "625</w>": 782, "37</w>": 783, "%</w>": 784, "error</w>": 785, "42</w>": 786, "attained</w>": 787, "While</w>": 788, "might</w>": 789, "lead</w>": 790, "net</w>": 791, "ways</w>": 792, "mistakes</w>": 793, "behavior</w>": 794, "As</w>": 795, "That</w>": 796, "save</w>": 797, "reinitialize</w>": 798, "whole</w>": 799, "retrain</w>": 800, "holds</w>": 801, "true</w>": 802, "worse</w>": 803, "drop</w>": 804, "contained</w>": 805, "successive</w>": 806, "interact</w>": 807, "complex</w>": 808, "relearned</w>": 809, "alone</w>": 810, "Gradient</w>": 811, "descent</w>": 812, "good</w>": 813, "solution</w>": 814, "time</w>": 815, "were</w>": 816, "jointly</w>": 817, "By</w>": 818, "nearly</w>": 819, "back</w>": 820, "get</w>": 821, "closer</w>": 822, "relearn</w>": 823, "apparently</w>": 824, "relearning</w>": 825, "simple</w>": 826, "enough</w>": 827, "gradient</w>": 828, "Alternately</w>": 829, "say</w>": 830, "previous</w>": 831, "knowledge</w>": 832, "observed</w>": 833, "literature</w>": 834, "prevents</w>": 835, "measure</w>": 836, "Layers</w>": 837, "perfectly</w>": 838, "giving</w>": 839, "blob</w>": 840, "shows</w>": 841, "slight</w>": 842, "significant</w>": 843, "Thanks</w>": 844, "tell</w>": 845, "combination</w>": 846, "lost</w>": 847, "dominates</w>": 848, "whereas</w>": 849, "diminishes</w>": 850, "Although</w>": 851, "successful</w>": 852, "elsewhere</w>": 853, "Girshick</w>": 854, "2013b</w>": 855, "limited</w>": 856, "noticing</w>": 857, "alternative</w>": 858, "strictly</w>": 859, "believe</w>": 860, "carefully</w>": 861, "quantified</w>": 862, "decoupled</w>": 863, "showing</w>": 864, "part</w>": 865, "regime</w>": 866, "generalize</w>": 867, "those</w>": 868, "directly</w>": 869, "Previously</w>": 870, "reason</w>": 871, "want</w>": 872, "suggests</w>": 873, "Note</w>": 874, "should</w>": 875, "attributed</w>": 876, "longer</w>": 877, "total</w>": 878, "450k</w>": 879, "iterations</w>": 880, "finetuned</w>": 881, "length</w>": 882, "improvement</w>": 883, "plausible</w>": 884, "beginning</w>": 885, "completely</w>": 886, "still</w>": 887, "linger</w>": 888, "boosting</w>": 889, "through</w>": 890, "retraining</w>": 891, "keep</w>": 892, "initialize</w>": 893, "keeping</w>": 894, "anywhere</w>": 895, "seven</w>": 896, "produces</w>": 897, "improved</w>": 898, "across</w>": 899, "Table</w>": 900, "4We</w>": 901, "aggregate</w>": 902, "point</w>": 903, "computationally</w>": 904, "expensive</w>": 905, "9</w>": 906, "days</w>": 907, "GPU</w>": 908, "publication</w>": 909, "per</w>": 910, "aggregation</w>": 911, "averaged</w>": 912, "ranges</w>": 913, "mean</w>": 914, "Dissimilar</w>": 915, "Splitting</w>": 916, "Man</w>": 917, "Natural</w>": 918, "Classes</w>": 919, "Into</w>": 920, "Separate</w>": 921, "mentioned</w>": 922, "effectiveness</w>": 923, "decline</w>": 924, "become</w>": 925, "hypothesis</w>": 926, "comparing</w>": 927, "object</w>": 928, "creates</w>": 929, "subplot</w>": 930, "orange</w>": 931, "hexagons</w>": 932, "join</w>": 933, "lines</w>": 934, "categories</w>": 935, "simply</w>": 936, "being</w>": 937, "easier</w>": 938, "Weights</w>": 939, "untrained</w>": 940, "showed</w>": 941, "strikingly</w>": 942, "rectification</w>": 943, "pooling</w>": 944, "local</w>": 945, "normalization</w>": 946, "They</w>": 947, "Caltech</w>": 948, "101</w>": 949, "Fei</w>": 950, "2004</w>": 951, "ask</w>": 952, "optimal</w>": 953, "carries</w>": 954, "deeper</w>": 955, "larger</w>": 956, "obtained</w>": 957, "various</w>": 958, "choices</w>": 959, "falls</w>": 960, "off</w>": 961, "quickly</w>": 962, "chance</w>": 963, "getting</w>": 964, "straightforward</w>": 965, "comparison</w>": 966, "Whereas</w>": 967, "max</w>": 968, "did</w>": 969, "nonlinearity</w>": 970, "relu</w>": 971, "x</w>": 972, "abs</w>": 973, "tanh</w>": 974, "sizes</w>": 975, "differences</w>": 976, "Additionally</w>": 977, "considered</w>": 978, "hyperparameter</w>": 979, "architectural</w>": 980, "datapoint</w>": 981, "tweak</w>": 982, "initialization</w>": 983, "subtracting</w>": 984, "individual</w>": 985, "normalized</w>": 986, "performances</w>": 987, "plotted</w>": 988, "makes</w>": 989, "things</w>": 990, "apparent</w>": 991, "gap</w>": 992, "grows</w>": 993, "25</w>": 994, "Second</w>": 995, "latter</w>": 996, "differ</w>": 997, "fully</w>": 998, "ours</w>": 999, "informative</w>": 1000, "however</w>": 1001, "based</w>": 1002, "draws</w>": 1003, "multiple</w>": 1004, "runs</w>": 1005, "loss</w>": 1006, "failed</w>": 1007, "converge</w>": 1008, "producing</w>": 1009, "Much</w>": 1010, "convergence</w>": 1011, "30</w>": 1012, "20</w>": 1013, "15</w>": 1014, "10</w>": 1015, "05</w>": 1016, "00</w>": 1017, "Relative</w>": 1018, "m</w>": 1019, "Degradation</w>": 1020, "vice</w>": 1021, "versa</w>": 1022, "connects</w>": 1023, "consist</w>": 1024, "plots</w>": 1025, "compared</w>": 1026, "making</w>": 1027, "extra</w>": 1028, "overfit</w>": 1029, "Conclusions</w>": 1030, "demonstrated</w>": 1031, "method</w>": 1032, "quantifying</w>": 1033, "fragilely</w>": 1034, "found</w>": 1035, "substantial</w>": 1036, "generally</w>": 1037, "technique</w>": 1038, "improving</w>": 1039, "Prototypical</w>": 1040, "Networks</w>": 1041, "Few</w>": 1042, "shot</w>": 1043, "Learning</w>": 1044, "Jake</w>": 1045, "Snell</w>": 1046, "Toronto</w>": 1047, "\u2217</w>": 1048, "Kevin</w>": 1049, "Swersky</w>": 1050, "Twitter</w>": 1051, "Richard</w>": 1052, "S</w>": 1053, "Zemel</w>": 1054, "Vector</w>": 1055, "Institute</w>": 1056, "propose</w>": 1057, "prototypical</w>": 1058, "classifier</w>": 1059, "metric</w>": 1060, "space</w>": 1061, "computing</w>": 1062, "distances</w>": 1063, "prototype</w>": 1064, "Compared</w>": 1065, "recent</w>": 1066, "approaches</w>": 1067, "reflect</w>": 1068, "simpler</w>": 1069, "inductive</w>": 1070, "bias</w>": 1071, "beneficial</w>": 1072, "achieve</w>": 1073, "excellent</w>": 1074, "analysis</w>": 1075, "design</w>": 1076, "decisions</w>": 1077, "improvements</w>": 1078, "involving</w>": 1079, "complicated</w>": 1080, "meta</w>": 1081, "extend</w>": 1082, "zero</w>": 1083, "theart</w>": 1084, "CU</w>": 1085, "Birds</w>": 1086, "16</w>": 1087, "accommodate</w>": 1088, "naive</w>": 1089, "re</w>": 1090, "model</w>": 1091, "severely</w>": 1092, "difficult</w>": 1093, "humans</w>": 1094, "ability</w>": 1095, "Two</w>": 1096, "progress</w>": 1097, "Vinyals</w>": 1098, "29</w>": 1099, "proposed</w>": 1100, "matching</w>": 1101, "uses</w>": 1102, "attention</w>": 1103, "mechanism</w>": 1104, "embedding</w>": 1105, "support</w>": 1106, "predict</w>": 1107, "unlabeled</w>": 1108, "query</w>": 1109, "Matching</w>": 1110, "interpreted</w>": 1111, "weighted</w>": 1112, "nearest</w>": 1113, "neighbor</w>": 1114, "applied</w>": 1115, "Notably</w>": 1116, "utilizes</w>": 1117, "sampled</w>": 1118, "mini</w>": 1119, "batches</w>": 1120, "called</w>": 1121, "episodes</w>": 1122, "episode</w>": 1123, "designed</w>": 1124, "mimic</w>": 1125, "subsampling</w>": 1126, "faithful</w>": 1127, "environment</w>": 1128, "thereby</w>": 1129, "Ravi</w>": 1130, "Larochelle</w>": 1131, "22</w>": 1132, "episodic</w>": 1133, "idea</w>": 1134, "Their</w>": 1135, "involves</w>": 1136, "LSTM</w>": 1137, "updates</w>": 1138, "Here</w>": 1139, "learner</w>": 1140, "learns</w>": 1141, "custom</w>": 1142, "attack</w>": 1143, "addressing</w>": 1144, "key</w>": 1145, "issue</w>": 1146, "under</w>": 1147, "assumption</w>": 1148, "Our</w>": 1149, "exists</w>": 1150, "cluster</w>": 1151, "around</w>": 1152, "order</w>": 1153, "linear</w>": 1154, "mapping</w>": 1155, "Classification</w>": 1156, "embedded</w>": 1157, "follow</w>": 1158, "tackle</w>": 1159, "comes</w>": 1160, "description</w>": 1161, "therefore</w>": 1162, "shared</w>": 1163, "serve</w>": 1164, "*</w>": 1165, "Initial</w>": 1166, "author</w>": 1167, "done</w>": 1168, "while</w>": 1169, "1703</w>": 1170, "05175v2</w>": 1171, "19</w>": 1172, "Jun</w>": 1173, "2017</w>": 1174, "c1</w>": 1175, "c2</w>": 1176, "c3</w>": 1177, "v1</w>": 1178, "v2</w>": 1179, "v3</w>": 1180, "b</w>": 1181, "Zero</w>": 1182, "scenarios</w>": 1183, "Left</w>": 1184, "prototypes</w>": 1185, "ck</w>": 1186, "Right</w>": 1187, "produced</w>": 1188, "vk</w>": 1189, "classified</w>": 1190, "via</w>": 1191, "p\u03c6</w>": 1192, "y</w>": 1193, "k</w>": 1194, "|</w>": 1195, "\u221d</w>": 1196, "exp</w>": 1197, "d</w>": 1198, "f\u03c6</w>": 1199, "scenario</w>": 1200, "formulate</w>": 1201, "settings</w>": 1202, "draw</w>": 1203, "connections</w>": 1204, "setting</w>": 1205, "analyze</w>": 1206, "underlying</w>": 1207, "relate</w>": 1208, "clustering</w>": 1209, "justify</w>": 1210, "Bregman</w>": 1211, "divergence</w>": 1212, "squared</w>": 1213, "Euclidean</w>": 1214, "empirically</w>": 1215, "vital</w>": 1216, "outperforms</w>": 1217, "commonly</w>": 1218, "cosine</w>": 1219, "benchmark</w>": 1220, "efficient</w>": 1221, "algorithms</w>": 1222, "appealing</w>": 1223, "Notation</w>": 1224, "x1</w>": 1225, "y1</w>": 1226, "xN</w>": 1227, "yN</w>": 1228, "xi</w>": 1229, "\u2208</w>": 1230, "R</w>": 1231, "D</w>": 1232, "yi</w>": 1233, "K</w>": 1234, "corresponding</w>": 1235, "Sk</w>": 1236, "denotes</w>": 1237, "Model</w>": 1238, "M</w>": 1239, "RM</w>": 1240, "\u2192</w>": 1241, "learnable</w>": 1242, "\u03c6</w>": 1243, "belonging</w>": 1244, "X</w>": 1245, "Given</w>": 1246, "\u00d7</w>": 1247, "\u221e</w>": 1248, "distribution</w>": 1249, "P</w>": 1250, "k0</w>": 1251, "ck0</w>": 1252, "proceeds</w>": 1253, "minimizing</w>": 1254, "negative</w>": 1255, "log</w>": 1256, "probability</w>": 1257, "J</w>": 1258, "SGD</w>": 1259, "Training</w>": 1260, "formed</w>": 1261, "selecting</w>": 1262, "choosing</w>": 1263, "act</w>": 1264, "remainder</w>": 1265, "Pseudocode</w>": 1266, "Algorithm</w>": 1267, "computation</w>": 1268, "NC</w>": 1269, "\u2264</w>": 1270, "NS</w>": 1271, "NQ</w>": 1272, "RANDOMSAMPLE</w>": 1273, "elements</w>": 1274, "uniformly</w>": 1275, "replacement</w>": 1276, "Input</w>": 1277, "Dk</w>": 1278, "Output</w>": 1279, "generated</w>": 1280, "V</w>": 1281, "\u2190</w>": 1282, "Select</w>": 1283, "indices</w>": 1284, "DVk</w>": 1285, "Qk</w>": 1286, "\\</w>": 1287, "Compute</w>": 1288, "end</w>": 1289, "Initialize</w>": 1290, "\"</w>": 1291, "logX</w>": 1292, "#</w>": 1293, "Update</w>": 1294, "Mixture</w>": 1295, "Density</w>": 1296, "Estimation</w>": 1297, "functions</w>": 1298, "regular</w>": 1299, "divergences</w>": 1300, "algorithm</w>": 1301, "performing</w>": 1302, "mixture</w>": 1303, "estimation</w>": 1304, "exponential</w>": 1305, "d\u03d5</w>": 1306, "defined</w>": 1307, "\u03d5</w>": 1308, "differentiable</w>": 1309, "convex</w>": 1310, "Legendre</w>": 1311, "type</w>": 1312, "Examples</w>": 1313, "include</w>": 1314, "kz</w>": 1315, "z</w>": 1316, "0k</w>": 1317, "Mahalanobis</w>": 1318, "Prototype</w>": 1319, "viewed</w>": 1320, "terms</w>": 1321, "hard</w>": 1322, "assigned</w>": 1323, "representative</w>": 1324, "achieving</w>": 1325, "minimal</w>": 1326, "Equation</w>": 1327, "representatives</w>": 1328, "Moreover</w>": 1329, "p\u03c8</w>": 1330, "\u03b8</w>": 1331, "cumulant</w>": 1332, "\u03c8</w>": 1333, "written</w>": 1334, "uniquely</w>": 1335, "determined</w>": 1336, "equally</w>": 1337, "assignment</w>": 1338, "inference</w>": 1339, "prediction</w>": 1340, "\u00b5</w>": 1341, "\u03b8k</w>": 1342, "effectively</w>": 1343, "specifies</w>": 1344, "modeling</w>": 1345, "assumptions</w>": 1346, "classconditional</w>": 1347, "Reinterpretation</w>": 1348, "Linear</w>": 1349, "gaining</w>": 1350, "insight</w>": 1351, "parameterization</w>": 1352, "term</w>": 1353, "constant</w>": 1354, "affect</w>": 1355, "probabilities</w>": 1356, "focus</w>": 1357, "primarily</w>": 1358, "spherical</w>": 1359, "Gaussian</w>": 1360, "densities</w>": 1361, "indicate</w>": 1362, "effective</w>": 1363, "despite</w>": 1364, "equivalence</w>": 1365, "hypothesize</w>": 1366, "required</w>": 1367, "linearity</w>": 1368, "Indeed</w>": 1369, "modern</w>": 1370, "systems</w>": 1371, "currently</w>": 1372, "14</w>": 1373, "28</w>": 1374, "Comparison</w>": 1375, "xk</w>": 1376, "since</w>": 1377, "question</w>": 1378, "fixed</w>": 1379, "greater</w>": 1380, "require</w>": 1381, "partitioning</w>": 1382, "scheme</w>": 1383, "Mensink</w>": 1384, "Rippel</w>": 1385, "methods</w>": 1386, "phase</w>": 1387, "ordinary</w>": 1388, "extensions</w>": 1389, "decoupling</w>": 1390, "conditional</w>": 1391, "FCE</w>": 1392, "takes</w>": 1393, "account</w>": 1394, "likewise</w>": 1395, "incorporated</w>": 1396, "increase</w>": 1397, "imposes</w>": 1398, "arbitrary</w>": 1399, "ordering</w>": 1400, "bi</w>": 1401, "directional</w>": 1402, "Instead</w>": 1403, "outline</w>": 1404, "Design</w>": 1405, "Choices</w>": 1406, "Distance</w>": 1407, "apply</w>": 1408, "permissible</w>": 1409, "conjecture</w>": 1410, "hold</w>": 1411, "Episode</w>": 1412, "composition</w>": 1413, "construct</w>": 1414, "Nc</w>": 1415, "match</w>": 1416, "situation</w>": 1417, "comprised</w>": 1418, "extremely</w>": 1419, "held</w>": 1420, "Another</w>": 1421, "consideration</w>": 1422, "usually</w>": 1423, "best</w>": 1424, "Shot</w>": 1425, "differs</w>": 1426, "accuracies</w>": 1427, "Omniglot</w>": 1428, "Acc</w>": 1429, "Dist</w>": 1430, "Tune</w>": 1431, "MATCHING</w>": 1432, "NETWORKS</w>": 1433, "Cosine</w>": 1434, "98</w>": 1435, "93</w>": 1436, "Y</w>": 1437, "97</w>": 1438, "NEURAL</w>": 1439, "STATISTICIAN</w>": 1440, "99</w>": 1441, "PROTOTYPICAL</w>": 1442, "OURS</w>": 1443, "Euclid</w>": 1444, "96</w>": 1445, "advance</w>": 1446, "raw</w>": 1447, "text</w>": 1448, "Modifying</w>": 1449, "deal</w>": 1450, "g\u03d1</w>": 1451, "An</w>": 1452, "illustration</w>": 1453, "procedure</w>": 1454, "relates</w>": 1455, "come</w>": 1456, "domains</w>": 1457, "helpful</w>": 1458, "fix</w>": 1459, "constrain</w>": 1460, "f</w>": 1461, "Experiments</w>": 1462, "miniImageNet</w>": 1463, "version</w>": 1464, "ILSVRC</w>": 1465, "26</w>": 1466, "UCSD</w>": 1467, "bird</w>": 1468, "CUB</w>": 1469, "200</w>": 1470, "31</w>": 1471, "1623</w>": 1472, "handwritten</w>": 1473, "characters</w>": 1474, "collected</w>": 1475, "alphabets</w>": 1476, "associated</w>": 1477, "character</w>": 1478, "drawn</w>": 1479, "human</w>": 1480, "subject</w>": 1481, "resizing</w>": 1482, "grayscale</w>": 1483, "augmenting</w>": 1484, "rotations</w>": 1485, "multiples</w>": 1486, "90</w>": 1487, "degrees</w>": 1488, "1200</w>": 1489, "plus</w>": 1490, "800</w>": 1491, "mirrors</w>": 1492, "composed</w>": 1493, "blocks</w>": 1494, "block</w>": 1495, "comprises</w>": 1496, "filter</w>": 1497, "convolution</w>": 1498, "batch</w>": 1499, "ReLU</w>": 1500, "encoder</w>": 1501, "All</w>": 1502, "Adam</w>": 1503, "11</w>": 1504, "initial</w>": 1505, "rate</w>": 1506, "cut</w>": 1507, "every</w>": 1508, "2000</w>": 1509, "No</w>": 1510, "regularization</w>": 1511, "advantageous</w>": 1512, "fewer</w>": 1513, "against</w>": 1514, "baselines</w>": 1515, "statistician</w>": 1516, "derived</w>": 1517, "12</w>": 1518, "84</w>": 1519, "divided</w>": 1520, "100</w>": 1521, "600</w>": 1522, "introduced</w>": 1523, "monitoring</w>": 1524, "though</w>": 1525, "1600</w>": 1526, "increased</w>": 1527, "95</w>": 1528, "confidence</w>": 1529, "intervals</w>": 1530, "BASELINE</w>": 1531, "NEAREST</w>": 1532, "NEIGHBORS</w>": 1533, "86</w>": 1534, "\u00b1</w>": 1535, "49</w>": 1536, "79</w>": 1537, "43</w>": 1538, "40</w>": 1539, "78</w>": 1540, "51</w>": 1541, "09</w>": 1542, "71</w>": 1543, "55</w>": 1544, "73</w>": 1545, "META</w>": 1546, "LEARNER</w>": 1547, "44</w>": 1548, "77</w>": 1549, "68</w>": 1550, "70</w>": 1551, "80</w>": 1552, "Accuracy</w>": 1553, "Proto</w>": 1554, "Nets</w>": 1555, "axis</w>": 1556, "indicates</w>": 1557, "configuration</w>": 1558, "Error</w>": 1559, "schedule</w>": 1560, "until</w>": 1561, "stops</w>": 1562, "variants</w>": 1563, "Meta</w>": 1564, "Learner</w>": 1565, "achieves</w>": 1566, "wide</w>": 1567, "margin</w>": 1568, "conducted</w>": 1569, "determine</w>": 1570, "own</w>": 1571, "difficulty</w>": 1572, "helps</w>": 1573, "forces</w>": 1574, "grained</w>": 1575, "Also</w>": 1576, "substantially</w>": 1577, "pronounced</w>": 1578, "naturally</w>": 1579, "suited</w>": 1580, "assess</w>": 1581, "suitability</w>": 1582, "run</w>": 1583, "788</w>": 1584, "species</w>": 1585, "closely</w>": 1586, "Reed</w>": 1587, "23</w>": 1588, "preparing</w>": 1589, "Image</w>": 1590, "ALE</w>": 1591, "Fisher</w>": 1592, "SJE</w>": 1593, "AlexNet</w>": 1594, "SAMPLE</w>": 1595, "CLUSTERING</w>": 1596, "17</w>": 1597, "GoogLeNet</w>": 1598, "DS</w>": 1599, "DA</w>": 1600, "PROTO</w>": 1601, "NETS</w>": 1602, "divide</w>": 1603, "024</w>": 1604, "extracted</w>": 1605, "applying</w>": 1606, "crops</w>": 1607, "horizontally</w>": 1608, "flipped</w>": 1609, "image2</w>": 1610, "At</w>": 1611, "crop</w>": 1612, "312</w>": 1613, "continuous</w>": 1614, "attribute</w>": 1615, "attributes</w>": 1616, "encode</w>": 1617, "characteristics</w>": 1618, "shape</w>": 1619, "feather</w>": 1620, "patterns</w>": 1621, "1024</w>": 1622, "normalize</w>": 1623, "domain</w>": 1624, "constructed</w>": 1625, "embeddings</w>": 1626, "optimized</w>": 1627, "decay</w>": 1628, "Early</w>": 1629, "stopping</w>": 1630, "epochs</w>": 1631, "utilizing</w>": 1632, "trains</w>": 1633, "SVM</w>": 1634, "relative</w>": 1635, "Related</w>": 1636, "Work</w>": 1637, "vast</w>": 1638, "summarize</w>": 1639, "most</w>": 1640, "relevant</w>": 1641, "Neighborhood</w>": 1642, "Components</w>": 1643, "Analysis</w>": 1644, "NCA</w>": 1645, "KNN</w>": 1646, "leave</w>": 1647, "transformed</w>": 1648, "Salakhutdinov</w>": 1649, "Hinton</w>": 1650, "27</w>": 1651, "transformation</w>": 1652, "LMNN</w>": 1653, "attempts</w>": 1654, "optimize</w>": 1655, "hinge</w>": 1656, "encourages</w>": 1657, "neighborhood</w>": 1658, "DNet</w>": 1659, "21</w>": 1660, "upon</w>": 1661, "extension</w>": 1662, "opposed</w>": 1663, "distinction</w>": 1664, "form</w>": 1665, "allows</w>": 1666, "concise</w>": 1667, "independent</w>": 1668, "obviates</w>": 1669, "store</w>": 1670, "predictions</w>": 1671, "represented</w>": 1672, "developed</w>": 1673, "rapidly</w>": 1674, "incorporate</w>": 1675, "relies</w>": 1676, "handle</w>": 1677, "downloaded</w>": 1678, "https</w>": 1679, "github</w>": 1680, "reedscot</w>": 1681, "cvpr2016</w>": 1682, "novel</w>": 1683, "contrast</w>": 1684, "linearly</w>": 1685, "embed</w>": 1686, "couple</w>": 1687, "attempt</w>": 1688, "allowing</w>": 1689, "pre</w>": 1690, "processing</w>": 1691, "step</w>": 1692, "multi</w>": 1693, "modal</w>": 1694, "variant</w>": 1695, "manner</w>": 1696, "no</w>": 1697, "requires</w>": 1698, "addition</w>": 1699, "generalizes</w>": 1700, "dynamics</w>": 1701, "itself</w>": 1702, "goal</w>": 1703, "forms</w>": 1704, "classifiers</w>": 1705, "dynamically</w>": 1706, "core</w>": 1707, "rely</w>": 1708, "nets</w>": 1709, "secondary</w>": 1710, "amount</w>": 1711, "generative</w>": 1712, "extends</w>": 1713, "variational</w>": 1714, "autoencoder</w>": 1715, "24</w>": 1716, "component</w>": 1717, "statistic</w>": 1718, "summarizes</w>": 1719, "encoding</w>": 1720, "taking</w>": 1721, "sample</w>": 1722, "post</w>": 1723, "approximate</w>": 1724, "posterior</w>": 1725, "Edwards</w>": 1726, "Storkey</w>": 1727, "considering</w>": 1728, "whose</w>": 1729, "KL</w>": 1730, "inferred</w>": 1731, "Like</w>": 1732, "summary</w>": 1733, "discriminative</w>": 1734, "befits</w>": 1735, "With</w>": 1736, "resembles</w>": 1737, "multimodal</w>": 1738, "Unlike</w>": 1739, "empirical</w>": 1740, "risk</w>": 1741, "Neither</w>": 1742, "nor</w>": 1743, "speed</w>": 1744, "regularize</w>": 1745, "Conclusion</w>": 1746, "specifically</w>": 1747, "far</w>": 1748, "sophisticated</w>": 1749, "although</w>": 1750, "modifying</w>": 1751, "direction</w>": 1752, "future</w>": 1753, "utilize</w>": 1754, "distributions</w>": 1755, "beyond</w>": 1756, "Gaussians</w>": 1757, "preliminary</w>": 1758, "explorations</w>": 1759, "variance</w>": 1760, "dimension</w>": 1761, "gains</w>": 1762, "flexibility</w>": 1763, "requiring</w>": 1764, "additional</w>": 1765, "fitted</w>": 1766, "Overall</w>": 1767, "simplicity</w>": 1768, "promising</w>": 1769}, "id_to_subword": {"0": "How</w>", "1": "transferable</w>", "2": "are</w>", "3": "features</w>", "4": "in</w>", "5": "deep</w>", "6": "neural</w>", "7": "networks</w>", "8": "?</w>", "9": "Jason</w>", "10": "Yosinski</w>", "11": ",</w>", "12": "1</w>", "13": "Jeff</w>", "14": "Clune</w>", "15": "2</w>", "16": "Yoshua</w>", "17": "Bengio</w>", "18": "3</w>", "19": "and</w>", "20": "Hod</w>", "21": "Lipson4</w>", "22": "Dept</w>", "23": ".</w>", "24": "Computer</w>", "25": "Science</w>", "26": "Cornell</w>", "27": "University</w>", "28": "of</w>", "29": "Wyoming</w>", "30": "&</w>", "31": "Operations</w>", "32": "Research</w>", "33": "Montreal</w>", "34": "4</w>", "35": "Mechanical</w>", "36": "Aerospace</w>", "37": "Engineering</w>", "38": "Abstract</w>", "39": "Many</w>", "40": "trained</w>", "41": "on</w>", "42": "natural</w>", "43": "images</w>", "44": "exhibit</w>", "45": "a</w>", "46": "curious</w>", "47": "phenomenon</w>", "48": "common</w>", "49": ":</w>", "50": "the</w>", "51": "first</w>", "52": "layer</w>", "53": "they</w>", "54": "learn</w>", "55": "similar</w>", "56": "to</w>", "57": "Gabor</w>", "58": "filters</w>", "59": "color</w>", "60": "blobs</w>", "61": "Such</w>", "62": "-</w>", "63": "appear</w>", "64": "not</w>", "65": "be</w>", "66": "specific</w>", "67": "particular</w>", "68": "dataset</w>", "69": "or</w>", "70": "task</w>", "71": "but</w>", "72": "general</w>", "73": "that</w>", "74": "applicable</w>", "75": "many</w>", "76": "datasets</w>", "77": "tasks</w>", "78": "Features</w>", "79": "must</w>", "80": "eventually</w>", "81": "transition</w>", "82": "from</w>", "83": "by</w>", "84": "last</w>", "85": "network</w>", "86": "this</w>", "87": "has</w>", "88": "been</w>", "89": "studied</w>", "90": "extensively</w>", "91": "In</w>", "92": "paper</w>", "93": "we</w>", "94": "experimentally</w>", "95": "quantify</w>", "96": "generality</w>", "97": "versus</w>", "98": "specificity</w>", "99": "neurons</w>", "100": "each</w>", "101": "convolutional</w>", "102": "report</w>", "103": "few</w>", "104": "surprising</w>", "105": "results</w>", "106": "Transferability</w>", "107": "is</w>", "108": "negatively</w>", "109": "affected</w>", "110": "two</w>", "111": "distinct</w>", "112": "issues</w>", "113": "(</w>", "114": ")</w>", "115": "specialization</w>", "116": "higher</w>", "117": "their</w>", "118": "original</w>", "119": "at</w>", "120": "expense</w>", "121": "performance</w>", "122": "target</w>", "123": "which</w>", "124": "was</w>", "125": "expected</w>", "126": "optimization</w>", "127": "difficulties</w>", "128": "related</w>", "129": "splitting</w>", "130": "between</w>", "131": "co</w>", "132": "adapted</w>", "133": "an</w>", "134": "example</w>", "135": "ImageNet</w>", "136": "demonstrate</w>", "137": "either</w>", "138": "these</w>", "139": "may</w>", "140": "dominate</w>", "141": "depending</w>", "142": "whether</w>", "143": "transferred</w>", "144": "bottom</w>", "145": "middle</w>", "146": "top</w>", "147": "We</w>", "148": "also</w>", "149": "document</w>", "150": "transferability</w>", "151": "decreases</w>", "152": "as</w>", "153": "distance</w>", "154": "base</w>", "155": "increases</w>", "156": "transferring</w>", "157": "even</w>", "158": "distant</w>", "159": "can</w>", "160": "better</w>", "161": "than</w>", "162": "using</w>", "163": "random</w>", "164": "A</w>", "165": "final</w>", "166": "result</w>", "167": "initializing</w>", "168": "with</w>", "169": "almost</w>", "170": "any</w>", "171": "number</w>", "172": "layers</w>", "173": "produce</w>", "174": "boost</w>", "175": "generalization</w>", "176": "lingers</w>", "177": "after</w>", "178": "fine</w>", "179": "tuning</w>", "180": "Introduction</w>", "181": "Modern</w>", "182": "when</w>", "183": "all</w>", "184": "tend</w>", "185": "resemble</w>", "186": "The</w>", "187": "appearance</w>", "188": "so</w>", "189": "obtaining</w>", "190": "anything</w>", "191": "else</w>", "192": "image</w>", "193": "causes</w>", "194": "suspicion</w>", "195": "poorly</w>", "196": "chosen</w>", "197": "hyperparameters</w>", "198": "software</w>", "199": "bug</w>", "200": "This</w>", "201": "occurs</w>", "202": "only</w>", "203": "for</w>", "204": "different</w>", "205": "very</w>", "206": "training</w>", "207": "objectives</w>", "208": "including</w>", "209": "supervised</w>", "210": "classification</w>", "211": "Krizhevsky</w>", "212": "et</w>", "213": "al</w>", "214": "2012</w>", "215": "unsupervised</w>", "216": "density</w>", "217": "learning</w>", "218": "Lee</w>", "219": "2009</w>", "220": "sparse</w>", "221": "representations</w>", "222": "Le</w>", "223": "2011</w>", "224": "Because</w>", "225": "finding</w>", "226": "standard</w>", "227": "seems</w>", "228": "occur</w>", "229": "regardless</w>", "230": "exact</w>", "231": "cost</w>", "232": "function</w>", "233": "call</w>", "234": "On</w>", "235": "other</w>", "236": "hand</w>", "237": "know</w>", "238": "computed</w>", "239": "depend</w>", "240": "greatly</w>", "241": "For</w>", "242": "N</w>", "243": "dimensional</w>", "244": "softmax</w>", "245": "output</w>", "246": "successfully</w>", "247": "toward</w>", "248": "objective</w>", "249": "unit</w>", "250": "will</w>", "251": "class</w>", "252": "thus</w>", "253": "These</w>", "254": "intuitive</w>", "255": "notions</w>", "256": "provide</w>", "257": "more</w>", "258": "rigorous</w>", "259": "definitions</w>", "260": "below</w>", "261": "If</w>", "262": "arXiv</w>", "263": "1411</w>", "264": "1792v1</w>", "265": "[</w>", "266": "cs</w>", "267": "LG</w>", "268": "]</w>", "269": "6</w>", "270": "Nov</w>", "271": "2014</w>", "272": "then</w>", "273": "there</w>", "274": "somewhere</w>", "275": "observation</w>", "276": "raises</w>", "277": "questions</w>", "278": "\u2022</w>", "279": "Can</w>", "280": "degree</w>", "281": "Does</w>", "282": "suddenly</w>", "283": "single</w>", "284": "it</w>", "285": "spread</w>", "286": "out</w>", "287": "over</w>", "288": "several</w>", "289": "Where</w>", "290": "does</w>", "291": "take</w>", "292": "place</w>", "293": "near</w>", "294": "interested</w>", "295": "answers</w>", "296": "because</w>", "297": "extent</w>", "298": "within</w>", "299": "able</w>", "300": "use</w>", "301": "them</w>", "302": "transfer</w>", "303": "Caruana</w>", "304": "1995</w>", "305": ";</w>", "306": "train</w>", "307": "repurpose</w>", "308": "learned</w>", "309": "second</w>", "310": "process</w>", "311": "work</w>", "312": "if</w>", "313": "meaning</w>", "314": "suitable</w>", "315": "both</w>", "316": "instead</w>", "317": "When</w>", "318": "significantly</w>", "319": "smaller</w>", "320": "powerful</w>", "321": "tool</w>", "322": "enable</w>", "323": "large</w>", "324": "without</w>", "325": "overfitting</w>", "326": "Recent</w>", "327": "studies</w>", "328": "have</w>", "329": "taken</w>", "330": "advantage</w>", "331": "fact</w>", "332": "obtain</w>", "333": "state</w>", "334": "art</w>", "335": "Donahue</w>", "336": "2013a</w>", "337": "Zeiler</w>", "338": "Fergus</w>", "339": "2013</w>", "340": "Sermanet</w>", "341": "collectively</w>", "342": "suggesting</w>", "343": "do</w>", "344": "indeed</w>", "345": "compute</w>", "346": "fairly</w>", "347": "further</w>", "348": "emphasize</w>", "349": "importance</w>", "350": "studying</w>", "351": "nature</w>", "352": "usual</w>", "353": "approach</w>", "354": "copy</w>", "355": "its</w>", "356": "n</w>", "357": "remaining</w>", "358": "randomly</w>", "359": "initialized</w>", "360": "One</w>", "361": "choose</w>", "362": "backpropagate</w>", "363": "errors</w>", "364": "new</w>", "365": "into</w>", "366": "copied</w>", "367": "tune</w>", "368": "feature</w>", "369": "left</w>", "370": "frozen</w>", "371": "change</w>", "372": "during</w>", "373": "choice</w>", "374": "depends</w>", "375": "size</w>", "376": "parameters</w>", "377": "small</w>", "378": "often</w>", "379": "problem</w>", "380": "tuned</w>", "381": "improve</w>", "382": "Of</w>", "383": "course</w>", "384": "would</w>", "385": "little</w>", "386": "need</w>", "387": "lower</w>", "388": "level</w>", "389": "could</w>", "390": "just</w>", "391": "scratch</w>", "392": "compare</w>", "393": "techniques</w>", "394": "\u2014</w>", "395": "following</w>", "396": "sections</w>", "397": "make</w>", "398": "contributions</w>", "399": "define</w>", "400": "way</w>", "401": "namely</w>", "402": "how</w>", "403": "well</w>", "404": "one</w>", "405": "another</w>", "406": "Section</w>", "407": "pairs</w>", "408": "characterize</w>", "409": "yields</w>", "410": "four</w>", "411": "show</w>", "412": "separate</w>", "413": "cause</w>", "414": "degradation</w>", "415": "i</w>", "416": "themselves</w>", "417": "ii</w>", "418": "due</w>", "419": "neighboring</w>", "420": "effects</w>", "421": "benefits</w>", "422": "dissimilar</w>", "423": "relatively</w>", "424": "find</w>", "425": "previously</w>", "426": "reported</w>", "427": "Jarrett</w>", "428": "weights</w>", "429": "vs</w>", "430": "perform</w>", "431": "5</w>", "432": "Finally</w>", "433": "particularly</w>", "434": "effect</w>", "435": "having</w>", "436": "seen</w>", "437": "persists</w>", "438": "extensive</w>", "439": "Generality</w>", "440": "Specificity</w>", "441": "Measured</w>", "442": "Transfer</w>", "443": "Performance</w>", "444": "noted</w>", "445": "tendency</w>", "446": "up</w>", "447": "study</w>", "448": "set</w>", "449": "used</w>", "450": "B</w>", "451": "It</w>", "452": "important</w>", "453": "note</w>", "454": "definition</w>", "455": "similarity</w>", "456": "create</w>", "457": "constructing</w>", "458": "non</w>", "459": "overlapping</w>", "460": "subsets</w>", "461": "To</w>", "462": "split</w>", "463": "1000</w>", "464": "classes</w>", "465": "groups</w>", "466": "containing</w>", "467": "500</w>", "468": "approximately</w>", "469": "half</w>", "470": "data</w>", "471": "about</w>", "472": "645</w>", "473": "000</w>", "474": "examples</w>", "475": "eight</w>", "476": "baseA</w>", "477": "baseB</w>", "478": "shown</w>", "479": "rows</w>", "480": "Figure</w>", "481": "{</w>", "482": "7</w>", "483": "}</w>", "484": "explanation</w>", "485": "=</w>", "486": "First</w>", "487": "selffer</w>", "488": "B3B</w>", "489": "five</w>", "490": "\u2013</w>", "491": "8</w>", "492": "control</w>", "493": "next</w>", "494": "row</w>", "495": "A3B</w>", "496": "Intuitively</w>", "497": "here</w>", "498": "classify</w>", "499": "performs</w>", "500": "evidence</w>", "501": "third</w>", "502": "least</w>", "503": "respect</w>", "504": "suffers</w>", "505": "repeated</w>", "506": "directions</w>", "507": "e</w>", "508": "AnB</w>", "509": "BnA</w>", "510": "above</w>", "511": "versions</w>", "512": "where</w>", "513": "+</w>", "514": "like</w>", "515": "assign</w>", "516": "contains</w>", "517": "clusters</w>", "518": "dogs</w>", "519": "cats</w>", "520": "13</w>", "521": "biological</w>", "522": "family</w>", "523": "Felidae</w>", "524": "tabby</w>", "525": "cat</w>", "526": "tiger</w>", "527": "Persian</w>", "528": "Siamese</w>", "529": "Egyptian</w>", "530": "mountain</w>", "531": "lion</w>", "532": "lynx</w>", "533": "leopard</w>", "534": "snow</w>", "535": "jaguar</w>", "536": "cheetah</w>", "537": "average</w>", "538": "contain</w>", "539": "felid</w>", "540": "levels</w>", "541": "help</w>", "542": "some</w>", "543": "types</w>", "544": "felids</w>", "545": "generalizing</w>", "546": "expect</w>", "547": "high</w>", "548": "detectors</w>", "549": "old</w>", "550": "low</w>", "551": "Thus</w>", "552": "created</w>", "553": "assigning</w>", "554": "less</w>", "555": "Fortunately</w>", "556": "provided</w>", "557": "hierarchy</w>", "558": "parent</w>", "559": "information</w>", "560": "allowed</w>", "561": "us</w>", "562": "special</w>", "563": "halves</w>", "564": "semantically</w>", "565": "possible</w>", "566": "man</w>", "567": "made</w>", "568": "entities</w>", "569": "quite</w>", "570": "551</w>", "571": "group</w>", "572": "449</w>", "573": "Further</w>", "574": "details</w>", "575": "given</w>", "576": "supplementary</w>", "577": "material</w>", "578": "1The</w>", "579": "released</w>", "580": "Large</w>", "581": "Scale</w>", "582": "Visual</w>", "583": "Recognition</w>", "584": "Challenge</w>", "585": "ILSVRC2012</w>", "586": "Deng</w>", "587": "281</w>", "588": "167</w>", "589": "labeled</w>", "590": "50</w>", "591": "test</w>", "592": "2Note</w>", "593": "doesn</w>", "594": "\u2019</w>", "595": "t</w>", "596": "sense</w>", "597": "case</w>", "598": "B8B</w>", "599": "A8B</w>", "600": "never</w>", "601": "input</w>", "602": "labels</w>", "603": "WA1</w>", "604": "WA2</w>", "605": "WA3</w>", "606": "WA4</w>", "607": "WA5</w>", "608": "WA6</w>", "609": "WA7</w>", "610": "WA8</w>", "611": "WB1</w>", "612": "WB2</w>", "613": "WB3</w>", "614": "WB4</w>", "615": "WB5</w>", "616": "WB6</w>", "617": "WB7</w>", "618": "WB8</w>", "619": "Overview</w>", "620": "experimental</w>", "621": "treatments</w>", "622": "controls</w>", "623": "Top</w>", "624": "backprop</w>", "625": "rectangles</w>", "626": "g</w>", "627": "represent</w>", "628": "weight</w>", "629": "vector</w>", "630": "indicating</w>", "631": "originally</w>", "632": "vertical</w>", "633": "ellipsoidal</w>", "634": "bars</w>", "635": "vectors</w>", "636": "activations</w>", "637": "Third</w>", "638": "upper</w>", "639": "\u2212</w>", "640": "entire</w>", "641": "same</w>", "642": "locked</w>", "643": "\u201c</w>", "644": "\u201d</w>", "645": "treatment</w>", "646": "reveals</w>", "647": "occurrence</w>", "648": "fragile</w>", "649": "coadaptation</w>", "650": "adapt</w>", "651": "such</w>", "652": "cannot</w>", "653": "rediscovered</w>", "654": "Fourth</w>", "655": "except</w>", "656": "tests</w>", "657": "Experimental</w>", "658": "Setup</w>", "659": "Since</w>", "660": "won</w>", "661": "competition</w>", "662": "much</w>", "663": "interest</w>", "664": "tweaking</w>", "665": "models</w>", "666": "However</w>", "667": "aim</w>", "668": "maximize</w>", "669": "absolute</w>", "670": "rather</w>", "671": "known</w>", "672": "architecture</w>", "673": "reference</w>", "674": "implementation</w>", "675": "Caffe</w>", "676": "Jia</w>", "677": "our</w>", "678": "comparable</w>", "679": "extensible</w>", "680": "useful</w>", "681": "researchers</w>", "682": "setup</w>", "683": "rates</w>", "684": "etc</w>", "685": "code</w>", "686": "parameter</w>", "687": "files</w>", "688": "reproduce</w>", "689": "experiments</w>", "690": "available</w>", "691": "http</w>", "692": "/</w>", "693": "yosinski</w>", "694": "com</w>", "695": "Results</w>", "696": "Discussion</w>", "697": "performed</w>", "698": "three</w>", "699": "sets</w>", "700": "main</w>", "701": "experiment</w>", "702": "splits</w>", "703": "discussed</w>", "704": "presents</w>", "705": "describes</w>", "706": "0</w>", "707": "52</w>", "708": "54</w>", "709": "56</w>", "710": "58</w>", "711": "60</w>", "712": "62</w>", "713": "64</w>", "714": "66</w>", "715": "accuracy</w>", "716": "BnB</w>", "717": "Layer</w>", "718": "chopped</w>", "719": "retrained</w>", "720": "improves</w>", "721": "Fine</w>", "722": "recovers</w>", "723": "interactions</w>", "724": "drops</w>", "725": "adaptation</w>", "726": "representation</w>", "727": "s</w>", "728": "Each</w>", "729": "marker</w>", "730": "figure</w>", "731": "represents</w>", "732": "validation</w>", "733": "white</w>", "734": "circles</w>", "735": "There</w>", "736": "points</w>", "737": "tested</w>", "738": "dark</w>", "739": "blue</w>", "740": "dot</w>", "741": "Light</w>", "742": "Dark</w>", "743": "red</w>", "744": "diamonds</w>", "745": "light</w>", "746": "Points</w>", "747": "shifted</w>", "748": "slightly</w>", "749": "right</w>", "750": "visual</w>", "751": "clarity</w>", "752": "Bottom</w>", "753": "Lines</w>", "754": "connecting</w>", "755": "means</w>", "756": "Numbered</w>", "757": "descriptions</w>", "758": "line</w>", "759": "refer</w>", "760": "interpretation</w>", "761": "applies</w>", "762": "Similar</w>", "763": "Datasets</w>", "764": "Random</w>", "765": "shown3</w>", "766": "yield</w>", "767": "conclusions</w>", "768": "interpretations</w>", "769": "dotted</w>", "770": "3AnA</w>", "771": "statistically</w>", "772": "equivalent</w>", "773": "cases</w>", "774": "simplify</w>", "775": "notation</w>", "776": "label</w>", "777": "Similarly</w>", "778": "aggregated</w>", "779": "identical</w>", "780": "subset</w>", "781": "attains</w>", "782": "625</w>", "783": "37</w>", "784": "%</w>", "785": "error</w>", "786": "42</w>", "787": "attained</w>", "788": "While</w>", "789": "might</w>", "790": "lead</w>", "791": "net</w>", "792": "ways</w>", "793": "mistakes</w>", "794": "behavior</w>", "795": "As</w>", "796": "That</w>", "797": "save</w>", "798": "reinitialize</w>", "799": "whole</w>", "800": "retrain</w>", "801": "holds</w>", "802": "true</w>", "803": "worse</w>", "804": "drop</w>", "805": "contained</w>", "806": "successive</w>", "807": "interact</w>", "808": "complex</w>", "809": "relearned</w>", "810": "alone</w>", "811": "Gradient</w>", "812": "descent</w>", "813": "good</w>", "814": "solution</w>", "815": "time</w>", "816": "were</w>", "817": "jointly</w>", "818": "By</w>", "819": "nearly</w>", "820": "back</w>", "821": "get</w>", "822": "closer</w>", "823": "relearn</w>", "824": "apparently</w>", "825": "relearning</w>", "826": "simple</w>", "827": "enough</w>", "828": "gradient</w>", "829": "Alternately</w>", "830": "say</w>", "831": "previous</w>", "832": "knowledge</w>", "833": "observed</w>", "834": "literature</w>", "835": "prevents</w>", "836": "measure</w>", "837": "Layers</w>", "838": "perfectly</w>", "839": "giving</w>", "840": "blob</w>", "841": "shows</w>", "842": "slight</w>", "843": "significant</w>", "844": "Thanks</w>", "845": "tell</w>", "846": "combination</w>", "847": "lost</w>", "848": "dominates</w>", "849": "whereas</w>", "850": "diminishes</w>", "851": "Although</w>", "852": "successful</w>", "853": "elsewhere</w>", "854": "Girshick</w>", "855": "2013b</w>", "856": "limited</w>", "857": "noticing</w>", "858": "alternative</w>", "859": "strictly</w>", "860": "believe</w>", "861": "carefully</w>", "862": "quantified</w>", "863": "decoupled</w>", "864": "showing</w>", "865": "part</w>", "866": "regime</w>", "867": "generalize</w>", "868": "those</w>", "869": "directly</w>", "870": "Previously</w>", "871": "reason</w>", "872": "want</w>", "873": "suggests</w>", "874": "Note</w>", "875": "should</w>", "876": "attributed</w>", "877": "longer</w>", "878": "total</w>", "879": "450k</w>", "880": "iterations</w>", "881": "finetuned</w>", "882": "length</w>", "883": "improvement</w>", "884": "plausible</w>", "885": "beginning</w>", "886": "completely</w>", "887": "still</w>", "888": "linger</w>", "889": "boosting</w>", "890": "through</w>", "891": "retraining</w>", "892": "keep</w>", "893": "initialize</w>", "894": "keeping</w>", "895": "anywhere</w>", "896": "seven</w>", "897": "produces</w>", "898": "improved</w>", "899": "across</w>", "900": "Table</w>", "901": "4We</w>", "902": "aggregate</w>", "903": "point</w>", "904": "computationally</w>", "905": "expensive</w>", "906": "9</w>", "907": "days</w>", "908": "GPU</w>", "909": "publication</w>", "910": "per</w>", "911": "aggregation</w>", "912": "averaged</w>", "913": "ranges</w>", "914": "mean</w>", "915": "Dissimilar</w>", "916": "Splitting</w>", "917": "Man</w>", "918": "Natural</w>", "919": "Classes</w>", "920": "Into</w>", "921": "Separate</w>", "922": "mentioned</w>", "923": "effectiveness</w>", "924": "decline</w>", "925": "become</w>", "926": "hypothesis</w>", "927": "comparing</w>", "928": "object</w>", "929": "creates</w>", "930": "subplot</w>", "931": "orange</w>", "932": "hexagons</w>", "933": "join</w>", "934": "lines</w>", "935": "categories</w>", "936": "simply</w>", "937": "being</w>", "938": "easier</w>", "939": "Weights</w>", "940": "untrained</w>", "941": "showed</w>", "942": "strikingly</w>", "943": "rectification</w>", "944": "pooling</w>", "945": "local</w>", "946": "normalization</w>", "947": "They</w>", "948": "Caltech</w>", "949": "101</w>", "950": "Fei</w>", "951": "2004</w>", "952": "ask</w>", "953": "optimal</w>", "954": "carries</w>", "955": "deeper</w>", "956": "larger</w>", "957": "obtained</w>", "958": "various</w>", "959": "choices</w>", "960": "falls</w>", "961": "off</w>", "962": "quickly</w>", "963": "chance</w>", "964": "getting</w>", "965": "straightforward</w>", "966": "comparison</w>", "967": "Whereas</w>", "968": "max</w>", "969": "did</w>", "970": "nonlinearity</w>", "971": "relu</w>", "972": "x</w>", "973": "abs</w>", "974": "tanh</w>", "975": "sizes</w>", "976": "differences</w>", "977": "Additionally</w>", "978": "considered</w>", "979": "hyperparameter</w>", "980": "architectural</w>", "981": "datapoint</w>", "982": "tweak</w>", "983": "initialization</w>", "984": "subtracting</w>", "985": "individual</w>", "986": "normalized</w>", "987": "performances</w>", "988": "plotted</w>", "989": "makes</w>", "990": "things</w>", "991": "apparent</w>", "992": "gap</w>", "993": "grows</w>", "994": "25</w>", "995": "Second</w>", "996": "latter</w>", "997": "differ</w>", "998": "fully</w>", "999": "ours</w>", "1000": "informative</w>", "1001": "however</w>", "1002": "based</w>", "1003": "draws</w>", "1004": "multiple</w>", "1005": "runs</w>", "1006": "loss</w>", "1007": "failed</w>", "1008": "converge</w>", "1009": "producing</w>", "1010": "Much</w>", "1011": "convergence</w>", "1012": "30</w>", "1013": "20</w>", "1014": "15</w>", "1015": "10</w>", "1016": "05</w>", "1017": "00</w>", "1018": "Relative</w>", "1019": "m</w>", "1020": "Degradation</w>", "1021": "vice</w>", "1022": "versa</w>", "1023": "connects</w>", "1024": "consist</w>", "1025": "plots</w>", "1026": "compared</w>", "1027": "making</w>", "1028": "extra</w>", "1029": "overfit</w>", "1030": "Conclusions</w>", "1031": "demonstrated</w>", "1032": "method</w>", "1033": "quantifying</w>", "1034": "fragilely</w>", "1035": "found</w>", "1036": "substantial</w>", "1037": "generally</w>", "1038": "technique</w>", "1039": "improving</w>", "1040": "Prototypical</w>", "1041": "Networks</w>", "1042": "Few</w>", "1043": "shot</w>", "1044": "Learning</w>", "1045": "Jake</w>", "1046": "Snell</w>", "1047": "Toronto</w>", "1048": "\u2217</w>", "1049": "Kevin</w>", "1050": "Swersky</w>", "1051": "Twitter</w>", "1052": "Richard</w>", "1053": "S</w>", "1054": "Zemel</w>", "1055": "Vector</w>", "1056": "Institute</w>", "1057": "propose</w>", "1058": "prototypical</w>", "1059": "classifier</w>", "1060": "metric</w>", "1061": "space</w>", "1062": "computing</w>", "1063": "distances</w>", "1064": "prototype</w>", "1065": "Compared</w>", "1066": "recent</w>", "1067": "approaches</w>", "1068": "reflect</w>", "1069": "simpler</w>", "1070": "inductive</w>", "1071": "bias</w>", "1072": "beneficial</w>", "1073": "achieve</w>", "1074": "excellent</w>", "1075": "analysis</w>", "1076": "design</w>", "1077": "decisions</w>", "1078": "improvements</w>", "1079": "involving</w>", "1080": "complicated</w>", "1081": "meta</w>", "1082": "extend</w>", "1083": "zero</w>", "1084": "theart</w>", "1085": "CU</w>", "1086": "Birds</w>", "1087": "16</w>", "1088": "accommodate</w>", "1089": "naive</w>", "1090": "re</w>", "1091": "model</w>", "1092": "severely</w>", "1093": "difficult</w>", "1094": "humans</w>", "1095": "ability</w>", "1096": "Two</w>", "1097": "progress</w>", "1098": "Vinyals</w>", "1099": "29</w>", "1100": "proposed</w>", "1101": "matching</w>", "1102": "uses</w>", "1103": "attention</w>", "1104": "mechanism</w>", "1105": "embedding</w>", "1106": "support</w>", "1107": "predict</w>", "1108": "unlabeled</w>", "1109": "query</w>", "1110": "Matching</w>", "1111": "interpreted</w>", "1112": "weighted</w>", "1113": "nearest</w>", "1114": "neighbor</w>", "1115": "applied</w>", "1116": "Notably</w>", "1117": "utilizes</w>", "1118": "sampled</w>", "1119": "mini</w>", "1120": "batches</w>", "1121": "called</w>", "1122": "episodes</w>", "1123": "episode</w>", "1124": "designed</w>", "1125": "mimic</w>", "1126": "subsampling</w>", "1127": "faithful</w>", "1128": "environment</w>", "1129": "thereby</w>", "1130": "Ravi</w>", "1131": "Larochelle</w>", "1132": "22</w>", "1133": "episodic</w>", "1134": "idea</w>", "1135": "Their</w>", "1136": "involves</w>", "1137": "LSTM</w>", "1138": "updates</w>", "1139": "Here</w>", "1140": "learner</w>", "1141": "learns</w>", "1142": "custom</w>", "1143": "attack</w>", "1144": "addressing</w>", "1145": "key</w>", "1146": "issue</w>", "1147": "under</w>", "1148": "assumption</w>", "1149": "Our</w>", "1150": "exists</w>", "1151": "cluster</w>", "1152": "around</w>", "1153": "order</w>", "1154": "linear</w>", "1155": "mapping</w>", "1156": "Classification</w>", "1157": "embedded</w>", "1158": "follow</w>", "1159": "tackle</w>", "1160": "comes</w>", "1161": "description</w>", "1162": "therefore</w>", "1163": "shared</w>", "1164": "serve</w>", "1165": "*</w>", "1166": "Initial</w>", "1167": "author</w>", "1168": "done</w>", "1169": "while</w>", "1170": "1703</w>", "1171": "05175v2</w>", "1172": "19</w>", "1173": "Jun</w>", "1174": "2017</w>", "1175": "c1</w>", "1176": "c2</w>", "1177": "c3</w>", "1178": "v1</w>", "1179": "v2</w>", "1180": "v3</w>", "1181": "b</w>", "1182": "Zero</w>", "1183": "scenarios</w>", "1184": "Left</w>", "1185": "prototypes</w>", "1186": "ck</w>", "1187": "Right</w>", "1188": "produced</w>", "1189": "vk</w>", "1190": "classified</w>", "1191": "via</w>", "1192": "p\u03c6</w>", "1193": "y</w>", "1194": "k</w>", "1195": "|</w>", "1196": "\u221d</w>", "1197": "exp</w>", "1198": "d</w>", "1199": "f\u03c6</w>", "1200": "scenario</w>", "1201": "formulate</w>", "1202": "settings</w>", "1203": "draw</w>", "1204": "connections</w>", "1205": "setting</w>", "1206": "analyze</w>", "1207": "underlying</w>", "1208": "relate</w>", "1209": "clustering</w>", "1210": "justify</w>", "1211": "Bregman</w>", "1212": "divergence</w>", "1213": "squared</w>", "1214": "Euclidean</w>", "1215": "empirically</w>", "1216": "vital</w>", "1217": "outperforms</w>", "1218": "commonly</w>", "1219": "cosine</w>", "1220": "benchmark</w>", "1221": "efficient</w>", "1222": "algorithms</w>", "1223": "appealing</w>", "1224": "Notation</w>", "1225": "x1</w>", "1226": "y1</w>", "1227": "xN</w>", "1228": "yN</w>", "1229": "xi</w>", "1230": "\u2208</w>", "1231": "R</w>", "1232": "D</w>", "1233": "yi</w>", "1234": "K</w>", "1235": "corresponding</w>", "1236": "Sk</w>", "1237": "denotes</w>", "1238": "Model</w>", "1239": "M</w>", "1240": "RM</w>", "1241": "\u2192</w>", "1242": "learnable</w>", "1243": "\u03c6</w>", "1244": "belonging</w>", "1245": "X</w>", "1246": "Given</w>", "1247": "\u00d7</w>", "1248": "\u221e</w>", "1249": "distribution</w>", "1250": "P</w>", "1251": "k0</w>", "1252": "ck0</w>", "1253": "proceeds</w>", "1254": "minimizing</w>", "1255": "negative</w>", "1256": "log</w>", "1257": "probability</w>", "1258": "J</w>", "1259": "SGD</w>", "1260": "Training</w>", "1261": "formed</w>", "1262": "selecting</w>", "1263": "choosing</w>", "1264": "act</w>", "1265": "remainder</w>", "1266": "Pseudocode</w>", "1267": "Algorithm</w>", "1268": "computation</w>", "1269": "NC</w>", "1270": "\u2264</w>", "1271": "NS</w>", "1272": "NQ</w>", "1273": "RANDOMSAMPLE</w>", "1274": "elements</w>", "1275": "uniformly</w>", "1276": "replacement</w>", "1277": "Input</w>", "1278": "Dk</w>", "1279": "Output</w>", "1280": "generated</w>", "1281": "V</w>", "1282": "\u2190</w>", "1283": "Select</w>", "1284": "indices</w>", "1285": "DVk</w>", "1286": "Qk</w>", "1287": "\\</w>", "1288": "Compute</w>", "1289": "end</w>", "1290": "Initialize</w>", "1291": "\"</w>", "1292": "logX</w>", "1293": "#</w>", "1294": "Update</w>", "1295": "Mixture</w>", "1296": "Density</w>", "1297": "Estimation</w>", "1298": "functions</w>", "1299": "regular</w>", "1300": "divergences</w>", "1301": "algorithm</w>", "1302": "performing</w>", "1303": "mixture</w>", "1304": "estimation</w>", "1305": "exponential</w>", "1306": "d\u03d5</w>", "1307": "defined</w>", "1308": "\u03d5</w>", "1309": "differentiable</w>", "1310": "convex</w>", "1311": "Legendre</w>", "1312": "type</w>", "1313": "Examples</w>", "1314": "include</w>", "1315": "kz</w>", "1316": "z</w>", "1317": "0k</w>", "1318": "Mahalanobis</w>", "1319": "Prototype</w>", "1320": "viewed</w>", "1321": "terms</w>", "1322": "hard</w>", "1323": "assigned</w>", "1324": "representative</w>", "1325": "achieving</w>", "1326": "minimal</w>", "1327": "Equation</w>", "1328": "representatives</w>", "1329": "Moreover</w>", "1330": "p\u03c8</w>", "1331": "\u03b8</w>", "1332": "cumulant</w>", "1333": "\u03c8</w>", "1334": "written</w>", "1335": "uniquely</w>", "1336": "determined</w>", "1337": "equally</w>", "1338": "assignment</w>", "1339": "inference</w>", "1340": "prediction</w>", "1341": "\u00b5</w>", "1342": "\u03b8k</w>", "1343": "effectively</w>", "1344": "specifies</w>", "1345": "modeling</w>", "1346": "assumptions</w>", "1347": "classconditional</w>", "1348": "Reinterpretation</w>", "1349": "Linear</w>", "1350": "gaining</w>", "1351": "insight</w>", "1352": "parameterization</w>", "1353": "term</w>", "1354": "constant</w>", "1355": "affect</w>", "1356": "probabilities</w>", "1357": "focus</w>", "1358": "primarily</w>", "1359": "spherical</w>", "1360": "Gaussian</w>", "1361": "densities</w>", "1362": "indicate</w>", "1363": "effective</w>", "1364": "despite</w>", "1365": "equivalence</w>", "1366": "hypothesize</w>", "1367": "required</w>", "1368": "linearity</w>", "1369": "Indeed</w>", "1370": "modern</w>", "1371": "systems</w>", "1372": "currently</w>", "1373": "14</w>", "1374": "28</w>", "1375": "Comparison</w>", "1376": "xk</w>", "1377": "since</w>", "1378": "question</w>", "1379": "fixed</w>", "1380": "greater</w>", "1381": "require</w>", "1382": "partitioning</w>", "1383": "scheme</w>", "1384": "Mensink</w>", "1385": "Rippel</w>", "1386": "methods</w>", "1387": "phase</w>", "1388": "ordinary</w>", "1389": "extensions</w>", "1390": "decoupling</w>", "1391": "conditional</w>", "1392": "FCE</w>", "1393": "takes</w>", "1394": "account</w>", "1395": "likewise</w>", "1396": "incorporated</w>", "1397": "increase</w>", "1398": "imposes</w>", "1399": "arbitrary</w>", "1400": "ordering</w>", "1401": "bi</w>", "1402": "directional</w>", "1403": "Instead</w>", "1404": "outline</w>", "1405": "Design</w>", "1406": "Choices</w>", "1407": "Distance</w>", "1408": "apply</w>", "1409": "permissible</w>", "1410": "conjecture</w>", "1411": "hold</w>", "1412": "Episode</w>", "1413": "composition</w>", "1414": "construct</w>", "1415": "Nc</w>", "1416": "match</w>", "1417": "situation</w>", "1418": "comprised</w>", "1419": "extremely</w>", "1420": "held</w>", "1421": "Another</w>", "1422": "consideration</w>", "1423": "usually</w>", "1424": "best</w>", "1425": "Shot</w>", "1426": "differs</w>", "1427": "accuracies</w>", "1428": "Omniglot</w>", "1429": "Acc</w>", "1430": "Dist</w>", "1431": "Tune</w>", "1432": "MATCHING</w>", "1433": "NETWORKS</w>", "1434": "Cosine</w>", "1435": "98</w>", "1436": "93</w>", "1437": "Y</w>", "1438": "97</w>", "1439": "NEURAL</w>", "1440": "STATISTICIAN</w>", "1441": "99</w>", "1442": "PROTOTYPICAL</w>", "1443": "OURS</w>", "1444": "Euclid</w>", "1445": "96</w>", "1446": "advance</w>", "1447": "raw</w>", "1448": "text</w>", "1449": "Modifying</w>", "1450": "deal</w>", "1451": "g\u03d1</w>", "1452": "An</w>", "1453": "illustration</w>", "1454": "procedure</w>", "1455": "relates</w>", "1456": "come</w>", "1457": "domains</w>", "1458": "helpful</w>", "1459": "fix</w>", "1460": "constrain</w>", "1461": "f</w>", "1462": "Experiments</w>", "1463": "miniImageNet</w>", "1464": "version</w>", "1465": "ILSVRC</w>", "1466": "26</w>", "1467": "UCSD</w>", "1468": "bird</w>", "1469": "CUB</w>", "1470": "200</w>", "1471": "31</w>", "1472": "1623</w>", "1473": "handwritten</w>", "1474": "characters</w>", "1475": "collected</w>", "1476": "alphabets</w>", "1477": "associated</w>", "1478": "character</w>", "1479": "drawn</w>", "1480": "human</w>", "1481": "subject</w>", "1482": "resizing</w>", "1483": "grayscale</w>", "1484": "augmenting</w>", "1485": "rotations</w>", "1486": "multiples</w>", "1487": "90</w>", "1488": "degrees</w>", "1489": "1200</w>", "1490": "plus</w>", "1491": "800</w>", "1492": "mirrors</w>", "1493": "composed</w>", "1494": "blocks</w>", "1495": "block</w>", "1496": "comprises</w>", "1497": "filter</w>", "1498": "convolution</w>", "1499": "batch</w>", "1500": "ReLU</w>", "1501": "encoder</w>", "1502": "All</w>", "1503": "Adam</w>", "1504": "11</w>", "1505": "initial</w>", "1506": "rate</w>", "1507": "cut</w>", "1508": "every</w>", "1509": "2000</w>", "1510": "No</w>", "1511": "regularization</w>", "1512": "advantageous</w>", "1513": "fewer</w>", "1514": "against</w>", "1515": "baselines</w>", "1516": "statistician</w>", "1517": "derived</w>", "1518": "12</w>", "1519": "84</w>", "1520": "divided</w>", "1521": "100</w>", "1522": "600</w>", "1523": "introduced</w>", "1524": "monitoring</w>", "1525": "though</w>", "1526": "1600</w>", "1527": "increased</w>", "1528": "95</w>", "1529": "confidence</w>", "1530": "intervals</w>", "1531": "BASELINE</w>", "1532": "NEAREST</w>", "1533": "NEIGHBORS</w>", "1534": "86</w>", "1535": "\u00b1</w>", "1536": "49</w>", "1537": "79</w>", "1538": "43</w>", "1539": "40</w>", "1540": "78</w>", "1541": "51</w>", "1542": "09</w>", "1543": "71</w>", "1544": "55</w>", "1545": "73</w>", "1546": "META</w>", "1547": "LEARNER</w>", "1548": "44</w>", "1549": "77</w>", "1550": "68</w>", "1551": "70</w>", "1552": "80</w>", "1553": "Accuracy</w>", "1554": "Proto</w>", "1555": "Nets</w>", "1556": "axis</w>", "1557": "indicates</w>", "1558": "configuration</w>", "1559": "Error</w>", "1560": "schedule</w>", "1561": "until</w>", "1562": "stops</w>", "1563": "variants</w>", "1564": "Meta</w>", "1565": "Learner</w>", "1566": "achieves</w>", "1567": "wide</w>", "1568": "margin</w>", "1569": "conducted</w>", "1570": "determine</w>", "1571": "own</w>", "1572": "difficulty</w>", "1573": "helps</w>", "1574": "forces</w>", "1575": "grained</w>", "1576": "Also</w>", "1577": "substantially</w>", "1578": "pronounced</w>", "1579": "naturally</w>", "1580": "suited</w>", "1581": "assess</w>", "1582": "suitability</w>", "1583": "run</w>", "1584": "788</w>", "1585": "species</w>", "1586": "closely</w>", "1587": "Reed</w>", "1588": "23</w>", "1589": "preparing</w>", "1590": "Image</w>", "1591": "ALE</w>", "1592": "Fisher</w>", "1593": "SJE</w>", "1594": "AlexNet</w>", "1595": "SAMPLE</w>", "1596": "CLUSTERING</w>", "1597": "17</w>", "1598": "GoogLeNet</w>", "1599": "DS</w>", "1600": "DA</w>", "1601": "PROTO</w>", "1602": "NETS</w>", "1603": "divide</w>", "1604": "024</w>", "1605": "extracted</w>", "1606": "applying</w>", "1607": "crops</w>", "1608": "horizontally</w>", "1609": "flipped</w>", "1610": "image2</w>", "1611": "At</w>", "1612": "crop</w>", "1613": "312</w>", "1614": "continuous</w>", "1615": "attribute</w>", "1616": "attributes</w>", "1617": "encode</w>", "1618": "characteristics</w>", "1619": "shape</w>", "1620": "feather</w>", "1621": "patterns</w>", "1622": "1024</w>", "1623": "normalize</w>", "1624": "domain</w>", "1625": "constructed</w>", "1626": "embeddings</w>", "1627": "optimized</w>", "1628": "decay</w>", "1629": "Early</w>", "1630": "stopping</w>", "1631": "epochs</w>", "1632": "utilizing</w>", "1633": "trains</w>", "1634": "SVM</w>", "1635": "relative</w>", "1636": "Related</w>", "1637": "Work</w>", "1638": "vast</w>", "1639": "summarize</w>", "1640": "most</w>", "1641": "relevant</w>", "1642": "Neighborhood</w>", "1643": "Components</w>", "1644": "Analysis</w>", "1645": "NCA</w>", "1646": "KNN</w>", "1647": "leave</w>", "1648": "transformed</w>", "1649": "Salakhutdinov</w>", "1650": "Hinton</w>", "1651": "27</w>", "1652": "transformation</w>", "1653": "LMNN</w>", "1654": "attempts</w>", "1655": "optimize</w>", "1656": "hinge</w>", "1657": "encourages</w>", "1658": "neighborhood</w>", "1659": "DNet</w>", "1660": "21</w>", "1661": "upon</w>", "1662": "extension</w>", "1663": "opposed</w>", "1664": "distinction</w>", "1665": "form</w>", "1666": "allows</w>", "1667": "concise</w>", "1668": "independent</w>", "1669": "obviates</w>", "1670": "store</w>", "1671": "predictions</w>", "1672": "represented</w>", "1673": "developed</w>", "1674": "rapidly</w>", "1675": "incorporate</w>", "1676": "relies</w>", "1677": "handle</w>", "1678": "downloaded</w>", "1679": "https</w>", "1680": "github</w>", "1681": "reedscot</w>", "1682": "cvpr2016</w>", "1683": "novel</w>", "1684": "contrast</w>", "1685": "linearly</w>", "1686": "embed</w>", "1687": "couple</w>", "1688": "attempt</w>", "1689": "allowing</w>", "1690": "pre</w>", "1691": "processing</w>", "1692": "step</w>", "1693": "multi</w>", "1694": "modal</w>", "1695": "variant</w>", "1696": "manner</w>", "1697": "no</w>", "1698": "requires</w>", "1699": "addition</w>", "1700": "generalizes</w>", "1701": "dynamics</w>", "1702": "itself</w>", "1703": "goal</w>", "1704": "forms</w>", "1705": "classifiers</w>", "1706": "dynamically</w>", "1707": "core</w>", "1708": "rely</w>", "1709": "nets</w>", "1710": "secondary</w>", "1711": "amount</w>", "1712": "generative</w>", "1713": "extends</w>", "1714": "variational</w>", "1715": "autoencoder</w>", "1716": "24</w>", "1717": "component</w>", "1718": "statistic</w>", "1719": "summarizes</w>", "1720": "encoding</w>", "1721": "taking</w>", "1722": "sample</w>", "1723": "post</w>", "1724": "approximate</w>", "1725": "posterior</w>", "1726": "Edwards</w>", "1727": "Storkey</w>", "1728": "considering</w>", "1729": "whose</w>", "1730": "KL</w>", "1731": "inferred</w>", "1732": "Like</w>", "1733": "summary</w>", "1734": "discriminative</w>", "1735": "befits</w>", "1736": "With</w>", "1737": "resembles</w>", "1738": "multimodal</w>", "1739": "Unlike</w>", "1740": "empirical</w>", "1741": "risk</w>", "1742": "Neither</w>", "1743": "nor</w>", "1744": "speed</w>", "1745": "regularize</w>", "1746": "Conclusion</w>", "1747": "specifically</w>", "1748": "far</w>", "1749": "sophisticated</w>", "1750": "although</w>", "1751": "modifying</w>", "1752": "direction</w>", "1753": "future</w>", "1754": "utilize</w>", "1755": "distributions</w>", "1756": "beyond</w>", "1757": "Gaussians</w>", "1758": "preliminary</w>", "1759": "explorations</w>", "1760": "variance</w>", "1761": "dimension</w>", "1762": "gains</w>", "1763": "flexibility</w>", "1764": "requiring</w>", "1765": "additional</w>", "1766": "fitted</w>", "1767": "Overall</w>", "1768": "simplicity</w>", "1769": "promising</w>"}, "model_type": "bpe", "vocab_size": 30522}