refs.bib

% SSL methods  
% context encoder, cpc, simclr, moco, byol, simsiam, vicreg, barlow twins
@string{ICCV="ICCV"}
@string{CVPR="CVPR"}
@string{NIPS="NeurIPS"}
@string{WACV="WACV"}
@string{ECCV="ECCV"}
@string{ICML="ICML"}
@string{ICLR="ICLR"}
@string{ICASSP="ICASSP"}
@string{AISTATS="AISTATS"}
@string{IJCV="IJCV"}
@string{CIVR="CIVR"}

@article{chen2020unsupervised,
  title   = {Unsupervised Image Classification for Deep Representation Learning},
  author  = {Chen, Weijie and Pu, Shiliang and Xie, Di and Yang, Shicai and Guo, Yilu and Lin, Luojun},
  journal = {arXiv preprint arXiv:2006.11480},
  year    = {2020}
}

@article{bromley1993signature,
  title   = {Signature verification using a" siamese" time delay neural network},
  author  = {Bromley, Jane and Guyon, Isabelle and LeCun, Yann and S{\"a}ckinger, Eduard and Shah, Roopak},
  journal = {Advances in neural information processing systems},
  volume  = {6},
  year    = {1993}
}

@article{shen2021s2,
  title   = {S2-BNN: Bridging the Gap Between Self-Supervised Real and 1-bit Neural Networks via Guided Distribution Calibration},
  author  = {Shen, Zhiqiang and Liu, Zechun and Qin, Jie and Huang, Lei and Cheng, Kwang-Ting and Savvides, Marios},
  journal = {arXiv preprint arXiv:2102.08946},
  year    = {2021}
}

@inproceedings{gidaris2020learning,
  title     = {Learning representations by predicting bags of visual words},
  author    = {Gidaris, Spyros and Bursuc, Andrei and Komodakis, Nikos and P{\'e}rez, Patrick and Cord, Matthieu},
  booktitle = CVPR,
  year      = {2020}
}

@article{junnan2021prototypical,
  title   = {Prototypical Contrastive Learning of Unsupervised Representations},
  author  = {Junnan Li and Pan Zhou and Caiming Xiong and Steven C.H. Hoi},
  journal = ICLR,
  year    = {2021}
}

@inproceedings{noroozi2018boosting,
  title     = {Boosting self-supervised learning via knowledge transfer},
  author    = {Noroozi, Mehdi and Vinjimoor, Ananth and Favaro, Paolo and Pirsiavash, Hamed},
  booktitle = CVPR,
  year      = {2018}
}

@inproceedings{fang2021seed,
  title   = {SEED: Self-supervised Distillation For Visual Representation},
  author  = {Fang, Zhiyuan and Wang, Jianfeng and Wang, Lijuan and Zhang, Lei and Yang, Yezhou and Liu, Zicheng},
  journal = ICLR,
  year    = {2021}
}

@article{xu2021seed,
  title   = {Seed the Views: Hierarchical Semantic Alignment for Contrastive Representation Learning},
  author  = {Xu, Haohang and Zhang, Xiaopeng and Li, Hao and Xie, Lingxi and Xiong, Hongkai and Tian, Qi},
  journal = {arXiv preprint arXiv:2012.02733},
  year    = {2021}
}

@article{radenovic2018fine,
  title   = {Fine-tuning CNN image retrieval with no human annotation},
  author  = {Radenovi{\'c}, Filip and Tolias, Giorgos and Chum, Ond{\v{r}}ej},
  journal = {IEEE transactions on pattern analysis and machine intelligence},
  year    = {2018}
}

@article{gidaris2020obow,
  title   = {Online Bag-of-Visual-Words Generation for Unsupervised Representation Learning},
  author  = {Gidaris, Spyros and Bursuc, Andrei and Puy, Gilles and Komodakis, Nikos and Cord, Matthieu and P{\'e}rez, Patrick},
  journal = {arXiv preprint arXiv:2012.11552},
  year    = {2020}
}

@article{berman2019multigrain,
  title   = {{{MultiGrain}: a unified image embedding for classes and instances}},
  author  = {Berman, Maxim and J{\'e}gou, Herv{\'e} and Vedaldi Andrea and Kokkinos, Iasonas and Douze, Matthijs},
  journal = {arXiv preprint arXiv:1902.05509},
  year    = {2019}
}

@article{thomee2015yfcc100m,
  title   = {YFCC100M: The new data in multimedia research},
  author  = {Thomee, Bart and Shamma, David A and Friedland, Gerald and Elizalde, Benjamin and Ni, Karl and Poland, Douglas and Borth, Damian and Li, Li-Jia},
  journal = {arXiv preprint arXiv:1503.01817},
  year    = {2015}
}

@inproceedings{douze2009evaluation,
  title     = {Evaluation of gist descriptors for web-scale image search},
  author    = {Douze, Matthijs and J{\'e}gou, Herv{\'e} and Sandhawalia, Harsimrat and Amsaleg, Laurent and Schmid, Cordelia},
  booktitle = CIVR,
  year      = {2009}
}

@article{sariyildiz2020concept,
  title   = {Concept generalization in visual representation learning},
  author  = {Sariyildiz, Mert Bulent and Kalantidis, Yannis and Larlus, Diane and Alahari, Karteek},
  journal = {arXiv preprint arXiv:2012.05649},
  year    = {2020}
}

@article{tolias2015particular,
  title   = {Particular object retrieval with integral max-pooling of CNN activations},
  author  = {Tolias, Giorgos and Sicre, Ronan and J{\'e}gou, Herv{\'e}},
  journal = {arXiv preprint arXiv:1511.05879},
  year    = {2015}
}

@inproceedings{revaud2019learning,
  title     = {Learning with average precision: Training image retrieval with a listwise loss},
  author    = {Revaud, Jerome and Almaz{\'a}n, Jon and Rezende, Rafael S and Souza, Cesar Roberto de},
  booktitle = ICCV,
  year      = {2019}
}

@article{weyand2020google,
  title     = {Google landmarks dataset v2-a large-scale benchmark for instance-level recognition and retrieval},
  author    = {Weyand, Tobias and Araujo, Andre and Cao, Bingyi and Sim, Jack},
  booktitle = CVPR,
  year      = {2020}
}

@inproceedings{philbin2008lost,
  title     = {Lost in quantization: Improving particular object retrieval in large scale image databases},
  author    = {Philbin, James and Chum, Ondrej and Isard, Michael and Sivic, Josef and Zisserman, Andrew},
  booktitle = CVPR,
  year      = {2008}
}

@article{radenovic2018revisiting,
  title     = {Revisiting oxford and paris: Large-scale image retrieval benchmarking},
  author    = {Radenovi{\'c}, Filip and Iscen, Ahmet and Tolias, Giorgos and Avrithis, Yannis and Chum, Ond{\v{r}}ej},
  booktitle = CVPR,
  year      = {2018}
}

@article{pinheiro2020unsupervised,
  title     = {Unsupervised Learning of Dense Visual Representations},
  author    = {Pinheiro, Pedro O and Almahairi, Amjad and Benmaleck, Ryan Y and Golemo, Florian and Courville, Aaron},
  booktitle = NIPS,
  year      = {2020}
}

@article{jabri2020space,
  title     = {Space-time correspondence as a contrastive random walk},
  author    = {Jabri, Allan and Owens, Andrew and Efros, Alexei A},
  booktitle = NIPS,
  year      = {2020}
}

@article{polyak1992acceleration,
  title     = {Acceleration of stochastic approximation by averaging},
  author    = {Polyak, Boris T and Juditsky, Anatoli B},
  journal   = {SIAM journal on control and optimization},
  volume    = {30},
  number    = {4},
  pages     = {838--855},
  year      = {1992},
  publisher = {SIAM}
}

@inproceedings{asano2019self,
  title     = {Self-labelling via simultaneous clustering and representation learning},
  author    = {Asano, Yuki Markus and Rupprecht, Christian and Vedaldi, Andrea},
  booktitle = ICLR,
  year      = {2020}
}

@inproceedings{caron2018deep,
  title     = {Deep clustering for unsupervised learning of visual features},
  author    = {Caron, Mathilde and Bojanowski, Piotr and Joulin, Armand and Douze, Matthijs},
  booktitle = ECCV,
  year      = {2018}
}

@inproceedings{wang2015unsupervised,
  title     = {Unsupervised learning of visual representations using videos},
  author    = {Wang, Xiaolong and Gupta, Abhinav},
  booktitle = {Proceedings of the IEEE international conference on computer vision},
  pages     = {2794--2802},
  year      = {2015}
}

@inproceedings{wu2018unsupervised,
  title     = {Unsupervised feature learning via non-parametric instance discrimination},
  author    = {Wu, Zhirong and Xiong, Yuanjun and Yu, Stella X and Lin, Dahua},
  booktitle = CVPR,
  year      = {2018}
}

@inproceedings{caron2019unsupervised,
  title     = {Unsupervised pre-training of image features on non-curated data},
  author    = {Caron, Mathilde and Bojanowski, Piotr and Mairal, Julien and Joulin, Armand},
  booktitle = ICCV,
  year      = {2019}
}

@inproceedings{lin2014microsoft,
  title     = {Microsoft coco: Common objects in context},
  author    = {Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
  booktitle = ECCV,
  year      = {2014}
}

@inproceedings{he2017mask,
  title     = {Mask r-cnn},
  author    = {He, Kaiming and Gkioxari, Georgia and Doll{\'a}r, Piotr and Girshick, Ross},
  booktitle = ICCV,
  year      = {2017}
}

@article{you2017large,
  title   = {Large Batch Training of Convolutional Networks},
  author  = {Yang You and Igor Gitman and Boris Ginsburg},
  journal = {preprint arXiv:1708.03888},
  year    = {2017}
}

@article{mairal2019cyanure,
  title   = {Cyanure: An Open-Source Toolbox for Empirical Risk Minimization for Python, C++, and soon more},
  author  = {Julien Mairal},
  journal = {preprint arXiv:1912.08165},
  year    = {2019}
}

@inproceedings{touvron2019fixing,
  title     = {Fixing the train-test resolution discrepancy},
  author    = {Touvron, Hugo and Vedaldi, Andrea and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
  booktitle = NIPS,
  year      = {2019}
}


@inproceedings{he2020momentum,
  title     = {Momentum contrast for unsupervised visual representation learning},
  author    = {He, Kaiming and Fan, Haoqi and Wu, Yuxin and Xie, Saining and Girshick, Ross},
  booktitle = CVPR,
  year      = {2020}
}

@inproceedings{misra2020self,
  title     = {Self-supervised learning of pretext-invariant representations},
  author    = {Misra, Ishan and Maaten, Laurens van der},
  booktitle = CVPR,
  year      = {2020}
}

@inproceedings{miech2020end,
  title     = {End-to-end learning of visual representations from uncurated instructional videos},
  author    = {Miech, Antoine and Alayrac, Jean-Baptiste and Smaira, Lucas and Laptev, Ivan and Sivic, Josef and Zisserman, Andrew},
  booktitle = CVPR,
  year      = {2020}
}

@inproceedings{yan2020clusterfit,
  title     = {ClusterFit: Improving Generalization of Visual Representations},
  author    = {Yan, Xueting and Misra, Ishan and Gupta, Abhinav and Ghadiyaram, Deepti and Mahajan, Dhruv},
  booktitle = CVPR,
  year      = {2020}
}

@inproceedings{mahajan2018exploring,
  title     = {Exploring the limits of weakly supervised pretraining},
  author    = {Mahajan, Dhruv and Girshick, Ross and Ramanathan, Vignesh and He, Kaiming and Paluri, Manohar and Li, Yixuan and Bharambe, Ashwin and van der Maaten, Laurens},
  booktitle = ECCV,
  year      = {2018}
}

@inproceedings{joulin2016learning,
  title     = {Learning visual features from large weakly supervised data},
  author    = {Joulin, Armand and Van Der Maaten, Laurens and Jabri, Allan and Vasilache, Nicolas},
  booktitle = ECCV,
  year      = {2016}
}

@article{chen2020simple,
  title   = {A simple framework for contrastive learning of visual representations},
  author  = {Chen, Ting and Kornblith, Simon and Norouzi, Mohammad and Hinton, Geoffrey},
  journal = {preprint arXiv:2002.05709},
  year    = {2020}
}

@inproceedings{grill2020bootstrap,
  title     = {Bootstrap your own latent: A new approach to self-supervised learning},
  author    = {Grill, Jean-Bastien and Strub, Florian and Altch{\'e}, Florent and Tallec, Corentin and Richemond, Pierre H and Buchatskaya, Elena and Doersch, Carl and Pires, Bernardo Avila and Guo, Zhaohan Daniel and Azar, Mohammad Gheshlaghi and Piot, Bilal and Kavukcuoglu, Koray and Munos, Rémi and Valko, Michal},
  booktitle = NIPS,
  year      = {2020}
}

@inproceedings{caron2020unsupervised,
  title     = {Unsupervised learning of visual features by contrasting cluster assignments},
  author    = {Caron, Mathilde and Misra, Ishan and Mairal, Julien and Goyal, Priya and Bojanowski, Piotr and Joulin, Armand},
  booktitle = NIPS,
  year      = {2020}
}

@inproceedings{oyallon2017scaling,
  title     = {Scaling the scattering transform: Deep hybrid networks},
  author    = {Oyallon, Edouard and Belilovsky, Eugene and Zagoruyko, Sergey},
  booktitle = ICCV,
  year      = {2017}
}

@inproceedings{bojanowski2017unsupervised,
  title     = {Unsupervised learning by predicting Noise},
  author    = {Bojanowski, Piotr and Joulin, Armand},
  booktitle = ICML,
  year      = {2017}
}

@article{radford2019language,
  title  = {Language models are unsupervised multitask learners},
  author = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya}
}

@article{devlin2018bert,
  title   = {Bert: Pre-training of deep bidirectional transformers for language understanding},
  author  = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  journal = {preprint arXiv:1810.04805},
  year    = {2018}
}

@article{schneider2019wav2vec,
  title   = {wav2vec: Unsupervised pre-training for speech recognition},
  author  = {Schneider, Steffen and Baevski, Alexei and Collobert, Ronan and Auli, Michael},
  journal = {preprint arXiv:1904.05862},
  year    = {2019}
}

@inproceedings{riviere2020unsupervised,
  title     = {Unsupervised pretraining transfers well across languages},
  author    = {Rivi{\`e}re, Morgane and Joulin, Armand and Mazar{\'e}, Pierre-Emmanuel and Dupoux, Emmanuel},
  booktitle = ICASSP,
  year      = {2020}
}

@inproceedings{baevski2020wav2vec,
  title     = {wav2vec 2.0: A framework for self-supervised learning of speech representations},
  author    = {Baevski, Alexei and Zhou, Henry and Mohamed, Abdelrahman and Auli, Michael},
  booktitle = NIPS,
  year      = {2020}
}

@inproceedings{kahn2020libri,
  title     = {Libri-light: A benchmark for asr with limited or no supervision},
  author    = {Kahn, Jacob and Rivi{\`e}re, Morgane and Zheng, Weiyi and Kharitonov, Evgeny and Xu, Qiantong and Mazar{\'e}, Pierre-Emmanuel and Karadayi, Julien and Liptchinsky, Vitaliy and Collobert, Ronan and Fuegen, Christian and others},
  booktitle = ICASSP,
  year      = {2020}
}

@inproceedings{pathak2017learning,
  title     = {Learning features by watching objects move},
  author    = {Pathak, Deepak and Girshick, Ross and Doll{\'a}r, Piotr and Darrell, Trevor and Hariharan, Bharath},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages     = {2701--2710},
  year      = {2017}
}

@inproceedings{noroozi2016unsupervised,
  title        = {Unsupervised learning of visual representations by solving jigsaw puzzles},
  author       = {Noroozi, Mehdi and Favaro, Paolo},
  booktitle    = {European conference on computer vision},
  pages        = {69--84},
  year         = {2016},
  organization = {Springer}
}

@article{he2021masked,
  title   = {Masked autoencoders are scalable vision learners},
  author  = {He, Kaiming and Chen, Xinlei and Xie, Saining and Li, Yanghao and Doll{\'a}r, Piotr and Girshick, Ross},
  journal = {arXiv preprint arXiv:2111.06377},
  year    = {2021}
}

@article{brown2020language,
  title   = {Language models are few-shot learners},
  author  = {Brown, Tom B and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
  journal = {preprint arXiv:2005.14165},
  year    = {2020}
}

@article{raffel2019exploring,
  title   = {Exploring the limits of transfer learning with a unified text-to-text transformer},
  author  = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J},
  journal = {preprint arXiv:1910.10683},
  year    = {2019}
}

@article{henaff2019data,
  title   = {Data-efficient image recognition with contrastive predictive coding},
  author  = {H{\'e}naff, Olivier J and Srinivas, Aravind and De Fauw, Jeffrey and Razavi, Ali and Doersch, Carl and Eslami, SM and Oord, Aaron van den},
  journal = {preprint arXiv:1905.09272},
  year    = {2019}
}

@inproceedings{kolesnikov2019big,
  title     = {Big transfer (BiT): General visual representation learning},
  author    = {Kolesnikov, Alexander and Beyer, Lucas and Zhai, Xiaohua and Puigcerver, Joan and Yung, Jessica and Gelly, Sylvain and Houlsby, Neil},
  booktitle = ECCV,
  year      = {2020}
}

@inproceedings{kolesnikov2019revisiting,
  title     = {Revisiting self-supervised visual representation learning},
  author    = {Kolesnikov, Alexander and Zhai, Xiaohua and Beyer, Lucas},
  booktitle = CVPR,
  year      = {2019}
}

@inproceedings{he2016deep,
  title     = {Deep residual learning for image recognition},
  author    = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle = CVPR,
  year      = {2016}
}

@article{tan2019efficientnet,
  title   = {Efficientnet: Rethinking model scaling for convolutional neural networks},
  author  = {Tan, Mingxing and Le, Quoc V},
  journal = {preprint arXiv:1905.11946},
  year    = {2019}
}

@article{touvron2020fixing,
  title   = {Fixing the train-test resolution discrepancy: FixEfficientNet},
  author  = {Touvron, Hugo and Vedaldi, Andrea and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
  journal = {preprint arXiv:2003.08237},
  year    = {2020}
}

@inproceedings{xie2017aggregated,
  title     = {Aggregated residual transformations for deep neural networks},
  author    = {Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
  booktitle = CVPR,
  year      = {2017}
}

@inproceedings{xie2016unsupervised,
  title     = {Unsupervised deep embedding for clustering analysis},
  author    = {Xie, Junyuan and Girshick, Ross and Farhadi, Ali},
  booktitle = ICML,
  year      = {2016}
}

@inproceedings{yang2016joint,
  title     = {Joint unsupervised learning of deep representations and image clusters},
  author    = {Yang, Jianwei and Parikh, Devi and Batra, Dhruv},
  booktitle = CVPR,
  year      = {2016}
}


@inproceedings{doersch2015unsupervised,
  title     = {Unsupervised visual representation learning by context prediction},
  author    = {Doersch, Carl and Gupta, Abhinav and Efros, Alexei A},
  booktitle = ICCV,
  year      = {2015}
}

@inproceedings{vincent2008extracting,
  author    = {P. Vincent and H. Larochelle and Y. Bengio and P.-A. Manzagol},
  title     = {Extracting and composing robust features with denoising autoencoders},
  booktitle = ICML,
  year      = {2008}
}

@inproceedings{ranzato2007unsupervised,
  author    = {Marc’Aurelio Ranzato and Fu-Jie Huang and Y-Lan Boureau and Yann LeCun},
  title     = {Unsupervised Learning of Invariant Feature Hierarchies with Applications to Object Recognition},
  booktitle = CVPR,
  year      = {2007}
}

@inproceedings{hadsell2006dimensionality,
  title     = {Dimensionality reduction by learning an invariant mapping},
  author    = {Hadsell, Raia and Chopra, Sumit and LeCun, Yann},
  booktitle = CVPR,
  year      = {2006}
}

@article{olshausen1996,
  author  = {B. A. Olshausen and D. J. Field},
  title   = {Emergence of simple-cell receptive field properties by learning a sparse code for natural images},
  journal = {Nature},
  volume  = {381},
  number  = {6583},
  pages   = {607},
  year    = {1996}
}

@inproceedings{coates2011analysis,
  title     = {An analysis of single-layer networks in unsupervised feature learning},
  author    = {Coates, Adam and Ng, Andrew and Lee, Honglak},
  booktitle = AISTATS,
  year      = {2011}
}

@article{li2020prototypical,
  title   = {Prototypical Contrastive Learning of Unsupervised Representations},
  author  = {Li, Junnan and Zhou, Pan and Xiong, Caiming and Socher, Richard and Hoi, Steven CH},
  journal = {preprint arXiv:2005.04966},
  year    = {2020}
}

@article{dosovitskiy2016discriminative,
  title   = {Discriminative unsupervised feature learning with exemplar convolutional neural networks},
  author  = {Dosovitskiy, Alexey and Fischer, Philipp and Springenberg, Jost Tobias and Riedmiller, Martin and Brox, Thomas},
  journal = {TPAMI},
  year    = {2016}
}

@article{oord2018representation,
  title   = {Representation learning with contrastive predictive coding},
  author  = {Oord, Aaron van den and Li, Yazhe and Vinyals, Oriol},
  journal = {preprint arXiv:1807.03748},
  year    = {2018}
}

@article{goyal2017accurate,
  title   = {Accurate, large minibatch sgd: Training imagenet in 1 hour},
  author  = {Goyal, Priya and Doll{\'a}r, Piotr and Girshick, Ross and Noordhuis, Pieter and Wesolowski, Lukasz and Kyrola, Aapo and Tulloch, Andrew and Jia, Yangqing and He, Kaiming},
  journal = {preprint arXiv:1706.02677},
  year    = {2017}
}

@article{russakovsky2015imagenet,
  title   = {Imagenet large scale visual recognition challenge},
  author  = {Russakovsky, Olga and Deng, Jia and Su, Hao and Krause, Jonathan and Satheesh, Sanjeev and Ma, Sean and Huang, Zhiheng and Karpathy, Andrej and Khosla, Aditya and Bernstein, Michael and Berg, Alexander C and Fei-Fei, Li },
  journal = IJCV,
  year    = {2015}
}


@inproceedings{goyal2019scaling,
  title     = {Scaling and benchmarking self-supervised visual representation learning},
  author    = {Goyal, Priya and Mahajan, Dhruv and Gupta, Abhinav and Misra, Ishan},
  booktitle = ICCV,
  year      = {2019}
}

@article{rajbhandari2019zero,
  author  = {Rajbhandari, Samyam and Rasley, Jeff and Ruwase, Olatunji and He, Yuxiong},
  title   = {ZeRO: Memory Optimizations Toward Training Trillion Parameter Models},
  journal = {preprint arXiv:1910.02054},
  year    = {2019}
}

@article{chen2016grad,
  author  = {Tianqi Chen and Bing Xu and Chiyuan Zhang and Carlos Guestrin},
  title   = {Training Deep Nets with Sublinear Memory Cost},
  journal = {preprint arXiv:1604.06174},
  year    = {2016}
}

@inproceedings{sohn2020fixmatch,
  title     = {Fixmatch: Simplifying semi-supervised learning with consistency and confidence},
  author    = {Sohn, Kihyuk and Berthelot, David and Li, Chun-Liang and Zhang, Zizhao and Carlini, Nicholas and Cubuk, Ekin D and Kurakin, Alex and Zhang, Han and Raffel, Colin},
  booktitle = NIPS,
  year      = {2020}
}

@inproceedings{chen2020big,
  title     = {Big self-supervised models are strong semi-supervised learners},
  author    = {Chen, Ting and Kornblith, Simon and Swersky, Kevin and Norouzi, Mohammad and Hinton, Geoffrey},
  booktitle = NIPS,
  year      = {2020}
}

@article{xu2020self,
  title   = {Self-training and Pre-training are Complementary for Speech Recognition},
  author  = {Xu, Qiantong and Baevski, Alexei and Likhomanenko, Tatiana and Tomasello, Paden and Conneau, Alexis and Collobert, Ronan and Synnaeve, Gabriel and Auli, Michael},
  journal = {preprint arXiv:2010.11430},
  year    = {2020}
}

@article{xu2020iterative,
  title   = {Iterative pseudo-labeling for speech recognition},
  author  = {Xu, Qiantong and Likhomanenko, Tatiana and Kahn, Jacob and Hannun, Awni and Synnaeve, Gabriel and Collobert, Ronan},
  journal = {preprint arXiv:2005.09267},
  year    = {2020}
}


@article{french2020milking,
  title   = {Milking CowMask for Semi-Supervised Image Classification},
  author  = {French, Geoff and Oliver, Avital and Salimans, Tim},
  journal = {preprint arXiv:2003.12022},
  year    = {2020}
}

@article{dosovitskiy2020image,
  title   = {An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale},
  author  = {Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and others},
  journal = {preprint arXiv:2010.11929},
  year    = {2020}
}

@inproceedings{sun2017revisiting,
  title     = {Revisiting unreasonable effectiveness of data in deep learning era},
  author    = {Sun, Chen and Shrivastava, Abhinav and Singh, Saurabh and Gupta, Abhinav},
  booktitle = ICCV,
  year      = {2017}
}

@inproceedings{hu2018squeeze,
  title     = {Squeeze-and-excitation networks},
  author    = {Hu, Jie and Shen, Li and Sun, Gang},
  booktitle = CVPR,
  year      = {2018}
}

@inproceedings{van2018inaturalist,
  title     = {The inaturalist species classification and detection dataset},
  author    = {Van Horn, Grant and Mac Aodha, Oisin and Song, Yang and Cui, Yin and Sun, Chen and Shepard, Alex and Adam, Hartwig and Perona, Pietro and Belongie, Serge},
  booktitle = CVPR,
  year      = {2018}
}

@article{kuznetsova2018open,
  title   = {The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale},
  author  = {Alina Kuznetsova and Mohamad Hassan Mohamad Rom and Neil Alldrin and Jasper Uijlings and Ivan Krasin and Jordi Pont-Tuset and Shahab Kamali and Stefan Popov and Matteo Malloci and Alexander Kolesnikov and Tom Duerig and Vittorio Ferrari},
  journal = IJCV,
  year    = {2020}
}


@inproceedings{zhou2014learning,
  title     = {Learning deep features for scene recognition using places database},
  author    = {Zhou, Bolei and Lapedriza, Agata and Xiao, Jianxiong and Torralba, Antonio and Oliva, Aude},
  booktitle = NIPS,
  year      = {2014}
}

@article{everingham2010pascal,
  title   = {The pascal visual object classes (voc) challenge},
  author  = {Everingham, Mark and Van Gool, Luc and Williams, Christopher KI and Winn, John and Zisserman, Andrew},
  journal = IJCV,
  year    = {2010}
}

@article{loshchilov2016sgdr,
  title   = {Sgdr: Stochastic gradient descent with warm restarts},
  author  = {Loshchilov, Ilya and Hutter, Frank},
  journal = {preprint arXiv:1608.03983},
  year    = {2016}
}

@article{du2020self,
  title   = {Self-training improves pre-training for natural language understanding},
  author  = {Du, Jingfei and Grave, Edouard and Gunel, Beliz and Chaudhary, Vishrav and Celebi, Onur and Auli, Michael and Stoyanov, Ves and Conneau, Alexis},
  journal = {preprint arXiv:2010.02194},
  year    = {2020}
}

@article{yalniz2019billion,
  title   = {Billion-scale semi-supervised learning for image classification},
  author  = {Yalniz, I Zeki and J{\'e}gou, Herv{\'e} and Chen, Kan and Paluri, Manohar and Mahajan, Dhruv},
  journal = {preprint arXiv:1905.00546},
  year    = {2019}
}

@article{zoph2020rethinking,
  title   = {Rethinking pre-training and self-training},
  author  = {Zoph, Barret and Ghiasi, Golnaz and Lin, Tsung-Yi and Cui, Yin and Liu, Hanxiao and Cubuk, Ekin D and Le, Quoc V},
  journal = {preprint arXiv:2006.06882},
  year    = {2020}
}

@article{zhang2020pushing,
  title   = {Pushing the Limits of Semi-Supervised Learning for Automatic Speech Recognition},
  author  = {Zhang, Yu and Qin, James and Park, Daniel S and Han, Wei and Chiu, Chung-Cheng and Pang, Ruoming and Le, Quoc V and Wu, Yonghui},
  journal = {preprint arXiv:2010.10504},
  year    = {2020}
}

@article{richemond2020byol,
  title   = {BYOL works even without batch statistics},
  author  = {Richemond, Pierre H and Grill, Jean-Bastien and Altch{\'e}, Florent and Tallec, Corentin and Strub, Florian and Brock, Andrew and Smith, Samuel and De, Soham and Pascanu, Razvan and Piot, Bilal and others},
  journal = {preprint arXiv:2010.10241},
  year    = {2020}
}

@inproceedings{bucilua2006model,
  title     = {Model compression},
  author    = {Buciluǎ, Cristian and Caruana, Rich and Niculescu-Mizil, Alexandru},
  booktitle = {SIGKDD},
  year      = {2006}
}

@article{hinton2015distilling,
  title   = {Distilling the knowledge in a neural network},
  author  = {Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
  journal = {preprint arXiv:1503.02531},
  year    = {2015}
}

@article{tarvainen2017mean,
  title   = {Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results},
  author  = {Tarvainen, Antti and Valpola, Harri},
  journal = {preprint arXiv:1703.01780},
  year    = {2017}
}

@article{touvron2020training,
  title   = {Training data-efficient image transformers \& distillation through attention},
  author  = {Touvron, Hugo and Cord, Matthieu and Douze, Matthijs and Massa, Francisco and Sablayrolles, Alexandre and J{\'e}gou, Herv{\'e}},
  journal = {preprint arXiv:2012.12877},
  year    = {2020}
}

@article{chen2020exploring,
  title   = {Exploring Simple Siamese Representation Learning},
  author  = {Chen, Xinlei and He, Kaiming},
  journal = {preprint arXiv:2011.10566},
  year    = {2020}
}

@article{chen2020improved,
  title   = {Improved baselines with momentum contrastive learning},
  author  = {Chen, Xinlei and Fan, Haoqi and Girshick, Ross and He, Kaiming},
  journal = {preprint arXiv:2003.04297},
  year    = {2020}
}


@article{bautista2016cliquecnn,
  title   = {Cliquecnn: Deep unsupervised exemplar learning},
  author  = {Bautista, Miguel A and Sanakoyeu, Artsiom and Sutter, Ekaterina and Ommer, Bj{\"o}rn},
  journal = {preprint arXiv:1608.08792},
  year    = {2016}
}

@inproceedings{huang2019unsupervised,
  title     = {Unsupervised deep learning by neighbourhood discovery},
  author    = {Huang, Jiabo and Dong, Qi and Gong, Shaogang and Zhu, Xiatian},
  booktitle = ICML,
  year      = {2019}
}

@inproceedings{zhuang2019local,
  title     = {Local aggregation for unsupervised learning of visual embeddings},
  author    = {Zhuang, Chengxu and Zhai, Alex Lin and Yamins, Daniel},
  booktitle = ICCV,
  year      = {2019}
}

@inproceedings{gutmann2010noise,
  title     = {Noise-contrastive estimation: A new estimation principle for unnormalized statistical models},
  author    = {Gutmann, Michael and Hyv{\"a}rinen, Aapo},
  booktitle = {International Conference on Artificial Intelligence and Statistics},
  year      = {2010}
}

@article{ermolov2020whitening,
  title   = {Whitening for self-supervised representation learning},
  author  = {Ermolov, Aleksandr and Siarohin, Aliaksandr and Sangineto, Enver and Sebe, Nicu},
  journal = {preprint arXiv:2007.06346},
  year    = {2020}
}

@article{zbontar2021barlow,
  title   = {Barlow Twins: Self-Supervised Learning via Redundancy Reduction},
  author  = {Zbontar, Jure and Jing, Li and Misra, Ishan and LeCun, Yann and Deny, St{\'e}phane},
  journal = {arXiv preprint arXiv:2103.03230},
  year    = {2021}
}

@inproceedings{lee2013pseudo,
  title     = {Pseudo-label: The simple and efficient semi-supervised learning method for deep neural networks},
  author    = {Lee, Dong-Hyun and others},
  booktitle = {Workshop on challenges in representation learning, ICML},
  year      = {2013}
}

@inproceedings{xie2020self,
  title     = {Self-training with noisy student improves imagenet classification},
  author    = {Xie, Qizhe and Luong, Minh-Thang and Hovy, Eduard and Le, Quoc V},
  booktitle = CVPR,
  year      = {2020}
}

@inproceedings{vaswani2017attention,
  title     = {Attention is all you need},
  author    = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  booktitle = NIPS,
  year      = {2017}
}

@inproceedings{zhao2020exploring,
  title     = {Exploring self-attention for image recognition},
  author    = {Zhao, Hengshuang and Jia, Jiaya and Koltun, Vladlen},
  booktitle = CVPR,
  year      = {2020}
}

@inproceedings{parmar2018image,
  title     = {Image transformer},
  author    = {Parmar, Niki and Vaswani, Ashish and Uszkoreit, Jakob and Kaiser, Lukasz and Shazeer, Noam and Ku, Alexander and Tran, Dustin},
  booktitle = ICML,
  year      = {2018}
}

@article{child2019generating,
  title   = {Generating long sequences with sparse transformers},
  author  = {Child, Rewon and Gray, Scott and Radford, Alec and Sutskever, Ilya},
  journal = {preprint arXiv:1904.10509},
  year    = {2019}
}

@article{hoffer2019mix,
  title   = {Mix \& Match: training convnets with mixed image sizes for improved accuracy, speed and scale resiliency},
  author  = {Hoffer, Elad and Weinstein, Berry and Hubara, Itay and Ben-Nun, Tal and Hoefler, Torsten and Soudry, Daniel},
  journal = {preprint arXiv:1908.08986},
  year    = {2019}
}

@article{bahdanau2014neural,
  title   = {Neural machine translation by jointly learning to align and translate},
  author  = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
  journal = {preprint arXiv:1409.0473},
  year    = {2014}
}

@article{klein2017opennmt,
  title   = {Opennmt: Open-source toolkit for neural machine translation},
  author  = {Klein, Guillaume and Kim, Yoon and Deng, Yuntian and Senellart, Jean and Rush, Alexander M},
  journal = {preprint arXiv:1701.02810},
  year    = {2017}
}

@article{chen2018best,
  title   = {The best of both worlds: Combining recent advances in neural machine translation},
  author  = {Chen, Mia Xu and Firat, Orhan and Bapna, Ankur and Johnson, Melvin and Macherey, Wolfgang and Foster, George and Jones, Llion and Parmar, Niki and Schuster, Mike and Chen, Zhifeng and others},
  journal = {preprint arXiv:1804.09849},
  year    = {2018}
}

@inproceedings{radosavovic2020designing,
  title     = {Designing network design spaces},
  author    = {Radosavovic, Ilija and Kosaraju, Raj Prateek and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr},
  booktitle = CVPR,
  year      = {2020}
}

@article{loshchilov2018fixing,
  title  = {Fixing weight decay regularization in adam},
  author = {Loshchilov, Ilya and Hutter, Frank},
  year   = {2018}
}

@inproceedings{zhang2016colorful,
  title     = {Colorful image colorization},
  author    = {Zhang, Richard and Isola, Phillip and Efros, Alexei A},
  booktitle = ECCV,
  year      = {2016}
}

@article{tian2020makes,
  title   = {What makes for good views for contrastive learning},
  author  = {Tian, Yonglong and Sun, Chen and Poole, Ben and Krishnan, Dilip and Schmid, Cordelia and Isola, Phillip},
  journal = NIPS,
  year    = {2020}
}

@article{salimans2016weight,
  title   = {Weight normalization: A simple reparameterization to accelerate training of deep neural networks},
  author  = {Salimans, Tim and Kingma, Diederik P},
  journal = NIPS,
  year    = {2016}
}

@inproceedings{bach2011non,
  title     = {Non-asymptotic analysis of stochastic approximation algorithms for machine learning},
  author    = {Bach, Francis and Moulines, Eric},
  booktitle = NIPS,
  year      = {2011}
}

@techreport{ruppert1988efficient,
  title  = {Efficient estimations from a slowly convergent Robbins-Monro process},
  author = {Ruppert, David},
  year   = {1988}
}

@article{izmailov2018averaging,
  title   = {Averaging weights leads to wider optima and better generalization},
  author  = {Izmailov, Pavel and Podoprikhin, Dmitrii and Garipov, Timur and Vetrov, Dmitry and Wilson, Andrew Gordon},
  journal = {preprint arXiv:1803.05407},
  year    = {2018}
}

@article{jean2014using,
  title   = {On using very large target vocabulary for neural machine translation},
  author  = {Jean, S{\'e}bastien and Cho, Kyunghyun and Memisevic, Roland and Bengio, Yoshua},
  journal = {preprint arXiv:1412.2007},
  year    = {2014}
}

@inproceedings{lai2020mast,
  title     = {MAST: A memory-augmented self-supervised tracker},
  author    = {Lai, Zihang and Lu, Erika and Xie, Weidi},
  booktitle = CVPR,
  year      = {2020}
}

@inproceedings{oh2019video,
  title     = {Video object segmentation using space-time memory networks},
  author    = {Oh, Seoung Wug and Lee, Joon-Young and Xu, Ning and Kim, Seon Joo},
  booktitle = ICCV,
  year      = {2019}
}

@inproceedings{wang2019learning,
  title     = {Learning correspondence from the cycle-consistency of time},
  author    = {Wang, Xiaolong and Jabri, Allan and Efros, Alexei A},
  booktitle = CVPR,
  year      = {2019}
}

@article{gur2020visualization,
  title   = {Visualization of Supervised and Self-Supervised Neural Networks via Attribution Guided Factorization},
  author  = {Gur, Shir and Ali, Ameen and Wolf, Lior},
  journal = {preprint arXiv:2012.02166},
  year    = {2020}
}

@article{goyal2021self,
  title   = {Self-supervised Pretraining of Visual Features in the Wild},
  author  = {Goyal, Priya and Caron, Mathilde and Lefaudeux, Benjamin and Xu, Min and Wang, Pengchao and Pai, Vivek and Singh, Mannat and Liptchinsky, Vitaliy and Misra, Ishan and Joulin, Armand and others},
  journal = {preprint arXiv:2103.01988},
  year    = {2021}
}

@article{el2021training,
  title   = {Training Vision Transformers for Image Retrieval},
  author  = {El-Nouby, Alaaeldin and Neverova, Natalia and Laptev, Ivan and J{\'e}gou, Herv{\'e}},
  journal = {preprint arXiv:2102.05644},
  year    = {2021}
}

@article{pont20172017,
  title   = {The 2017 davis challenge on video object segmentation},
  author  = {Pont-Tuset, Jordi and Perazzi, Federico and Caelles, Sergi and Arbel{\'a}ez, Pablo and Sorkine-Hornung, Alex and Van Gool, Luc},
  journal = {preprint arXiv:1704.00675},
  year    = {2017}
}

@inproceedings{cuturi2013sinkhorn,
  title     = {Sinkhorn distances: Lightspeed computation of optimal transport},
  author    = {Cuturi, Marco},
  booktitle = NIPS,
  year      = {2013}
}

@inproceedings{nilsback2008automated,
  title     = {Automated flower classification over a large number of classes},
  author    = {Nilsback, Maria-Elena and Zisserman, Andrew},
  booktitle = {2008 Sixth Indian Conference on Computer Vision, Graphics \& Image Processing},
  year      = {2008}
}

@article{pham2020meta,
  title   = {Meta pseudo labels},
  author  = {Pham, Hieu and Xie, Qizhe and Dai, Zihang and Le, Quoc V},
  journal = {preprint arXiv:2003.10580},
  year    = {2020}
}

@article{xie2020unsupervised,
  title   = {Unsupervised Data Augmentation for Consistency Training},
  author  = {Xie, Qizhe and Dai, Zihang Dai and Hovy, Eduard and Luong, Minh-Thang and Le, Quoc V.},
  journal = {preprint arXiv:1904.12848},
  year    = {2020}
}

@article{assran2020recovering,
  title   = {Recovering Petaflops in Contrastive Semi-Supervised Learning of Visual Representations},
  author  = {Assran, Mahmoud and Ballas, Nicolas and Castrejon, Lluis and Rabbat, Michael},
  journal = {preprint arXiv:2006.10803},
  year    = {2020}
}

@inproceedings{zhang2017split,
  title     = {Split-brain autoencoders: Unsupervised learning by cross-channel prediction},
  author    = {Zhang, Richard and Isola, Phillip and Efros, Alexei A},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages     = {1058--1067},
  year      = {2017}
}

@inproceedings{chen2019self,
  title     = {Self-supervised gans via auxiliary rotation loss},
  author    = {Chen, Ting and Zhai, Xiaohua and Ritter, Marvin and Lucic, Mario and Houlsby, Neil},
  booktitle = {CVPR},
  pages     = {12154--12163},
  year      = {2019}
}

@article{anil2018large,
  title   = {Large scale distributed neural network training through online distillation},
  author  = {Anil, Rohan and Pereyra, Gabriel and Passos, Alexandre and Ormandi, Robert and Dahl, George E and Hinton, Geoffrey E},
  journal = {arXiv preprint arXiv:1804.03235},
  year    = {2018}
}


@inproceedings{pathak2016context,
  title     = {Context encoders: Feature learning by inpainting},
  author    = {Pathak, Deepak and Krahenbuhl, Philipp and Donahue, Jeff and Darrell, Trevor and Efros, Alexei A},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages     = {2536--2544},
  year      = {2016}
}

@inproceedings{henaff2020data,
  title        = {Data-efficient image recognition with contrastive predictive coding},
  author       = {Henaff, Olivier},
  booktitle    = {International Conference on Machine Learning},
  pages        = {4182--4192},
  year         = {2020},
  organization = {PMLR}
}

@inproceedings{chen2021exploring,
  title     = {Exploring simple siamese representation learning},
  author    = {Chen, Xinlei and He, Kaiming},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages     = {15750--15758},
  year      = {2021}
}

@article{bardes2021vicreg,
  title   = {Vicreg: Variance-invariance-covariance regularization for self-supervised learning},
  author  = {Bardes, Adrien and Ponce, Jean and LeCun, Yann},
  journal = {arXiv preprint arXiv:2105.04906},
  year    = {2021}
}

% self-supervised learning theory
% yuandong papers 
@article{tian2021understanding,
  title   = {Understanding self-supervised learning dynamics without contrastive pairs},
  author  = {Tian, Yuandong and Chen, Xinlei and Ganguli, Surya},
  journal = {arXiv preprint arXiv:2102.06810},
  year    = {2021}
}
@article{jing2021understanding,
  title   = {Understanding dimensional collapse in contrastive self-supervised learning},
  author  = {Jing, Li and Vincent, Pascal and LeCun, Yann and Tian, Yuandong},
  journal = {arXiv preprint arXiv:2110.09348},
  year    = {2021}
}
@article{wang2021towards,
  title   = {Towards Demystifying Representation Learning with Non-contrastive Self-supervision},
  author  = {Wang, Xiang and Chen, Xinlei and Du, Simon S and Tian, Yuandong},
  journal = {arXiv preprint arXiv:2110.04947},
  year    = {2021}
}

% MoCo-v3
@inproceedings{chen2021empirical,
  title     = {An empirical study of training self-supervised vision transformers},
  author    = {Chen, Xinlei and Xie, Saining and He, Kaiming},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages     = {9640--9649},
  year      = {2021}
}

@article{nakkiran2019deep,
  title   = {Deep Double Descent: Where Bigger Models and More Data Hurt.(2019)},
  author  = {Nakkiran, Preetum and Kaplun, Gal and Bansal, Yamini and Yang, Tristan and Barak, Boaz and Sutskever, Ilya},
  journal = {arXiv preprint arXiv:1912.02292},
  year    = {2019}
}

@article{li2018measuring,
  title   = {Measuring the intrinsic dimension of objective landscapes},
  author  = {Li, Chunyuan and Farkhoor, Heerad and Liu, Rosanne and Yosinski, Jason},
  journal = {arXiv preprint arXiv:1804.08838},
  year    = {2018}
}
% cifar-10

@article{Krizhevsky09learningmultiple,
  title     = {Learning multiple layers of features from tiny images},
  author    = {Krizhevsky, Alex and Hinton, Geoffrey and others},
  year      = {2009},
  publisher = {Citeseer}
}

@inproceedings{deng2009imagenet,
  title        = {Imagenet: A large-scale hierarchical image database},
  author       = {Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li},
  booktitle    = {2009 IEEE conference on computer vision and pattern recognition},
  pages        = {248--255},
  year         = {2009},
  organization = {Ieee}
}
% papers cited by yuandong paper (eg sanjeev)

% continual learning 
@article{french1999catastrophic,
  title     = {Catastrophic forgetting in connectionist networks},
  author    = {French, Robert M},
  journal   = {Trends in cognitive sciences},
  volume    = {3},
  number    = {4},
  pages     = {128--135},
  year      = {1999},
  publisher = {Elsevier}
}
@article{kirkpatrick2017overcoming,
  title     = {Overcoming catastrophic forgetting in neural networks},
  author    = {Kirkpatrick, James and Pascanu, Razvan and Rabinowitz, Neil and Veness, Joel and Desjardins, Guillaume and Rusu, Andrei A and Milan, Kieran and Quan, John and Ramalho, Tiago and Grabska-Barwinska, Agnieszka and others},
  journal   = {Proceedings of the national academy of sciences},
  volume    = {114},
  number    = {13},
  pages     = {3521--3526},
  year      = {2017},
  publisher = {National Acad Sciences}
}
@inproceedings{ioffe2015batch,
  title        = {Batch normalization: Accelerating deep network training by reducing internal covariate shift},
  author       = {Ioffe, Sergey and Szegedy, Christian},
  booktitle    = {International conference on machine learning},
  pages        = {448--456},
  year         = {2015},
  organization = {PMLR}
}
@article{deng2021flattening,
  title   = {Flattening Sharpness for Dynamic Gradient Projection Memory Benefits Continual Learning},
  author  = {Deng, Danruo and Chen, Guangyong and Hao, Jianye and Wang, Qiong and Heng, Pheng-Ann},
  journal = {Advances in Neural Information Processing Systems},
  volume  = {34},
  year    = {2021}
}
@inproceedings{rebuffi2017icarl,
  title     = {icarl: Incremental classifier and representation learning},
  author    = {Rebuffi, Sylvestre-Alvise and Kolesnikov, Alexander and Sperl, Georg and Lampert, Christoph H},
  booktitle = {Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
  pages     = {2001--2010},
  year      = {2017}
}
@article{lopez2017gradient,
  title   = {Gradient episodic memory for continual learning},
  author  = {Lopez-Paz, David and Ranzato, Marc'Aurelio},
  journal = {Advances in neural information processing systems},
  volume  = {30},
  year    = {2017}
}
@article{purushwalkam2020demystifying,
  title   = {Demystifying contrastive self-supervised learning: Invariances, augmentations and dataset biases},
  author  = {Purushwalkam, Senthil and Gupta, Abhinav},
  journal = {Advances in Neural Information Processing Systems},
  volume  = {33},
  pages   = {3407--3418},
  year    = {2020}
}
@article{tian2022deep,
  title   = {Deep Contrastive Learning is Provably (almost) Principal Component Analysis},
  author  = {Tian, Yuandong},
  journal = {arXiv preprint arXiv:2201.12680},
  year    = {2022}
}

@inproceedings{du2021curious,
  title     = {Curious representation learning for embodied intelligence},
  author    = {Du, Yilun and Gan, Chuang and Isola, Phillip},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages     = {10408--10417},
  year      = {2021}
}
@article{kontogiannis2021tree,
  title   = {Tree-based Focused Web Crawling with Reinforcement Learning},
  author  = {Kontogiannis, Andreas and Kelesis, Dimitrios and Pollatos, Vasilis and Paliouras, Georgios and Giannakopoulos, George},
  journal = {arXiv preprint arXiv:2112.07620},
  year    = {2021}
}
@inproceedings{chen2013neil,
  title     = {Neil: Extracting visual knowledge from web data},
  author    = {Chen, Xinlei and Shrivastava, Abhinav and Gupta, Abhinav},
  booktitle = {Proceedings of the IEEE international conference on computer vision},
  pages     = {1409--1416},
  year      = {2013}
}
@inproceedings{carlson2010toward,
  title     = {Toward an architecture for never-ending language learning},
  author    = {Carlson, Andrew and Betteridge, Justin and Kisiel, Bryan and Settles, Burr and Hruschka, Estevam R and Mitchell, Tom M},
  booktitle = {Twenty-Fourth AAAI conference on artificial intelligence},
  year      = {2010}
}
@article{mitchell2018never,
  title     = {Never-ending learning},
  author    = {Mitchell, Tom and Cohen, William and Hruschka, Estevam and Talukdar, Partha and Yang, Bishan and Betteridge, Justin and Carlson, Andrew and Dalvi, Bhavana and Gardner, Matt and Kisiel, Bryan and others},
  journal   = {Communications of the ACM},
  volume    = {61},
  number    = {5},
  pages     = {103--115},
  year      = {2018},
  publisher = {ACM New York, NY, USA}
}

@article{nakano2021webgpt,
  title   = {WebGPT: Browser-assisted question-answering with human feedback},
  author  = {Nakano, Reiichiro and Hilton, Jacob and Balaji, Suchir and Wu, Jeff and Ouyang, Long and Kim, Christina and Hesse, Christopher and Jain, Shantanu and Kosaraju, Vineet and Saunders, William and others},
  journal = {arXiv preprint arXiv:2112.09332},
  year    = {2021}
}

@article{stiennon2020learning,
  title   = {Learning to summarize with human feedback},
  author  = {Stiennon, Nisan and Ouyang, Long and Wu, Jeffrey and Ziegler, Daniel and Lowe, Ryan and Voss, Chelsea and Radford, Alec and Amodei, Dario and Christiano, Paul F},
  journal = {Advances in Neural Information Processing Systems},
  volume  = {33},
  pages   = {3008--3021},
  year    = {2020}
}
@article{ziegler2019fine,
  title   = {Fine-tuning language models from human preferences},
  author  = {Ziegler, Daniel M and Stiennon, Nisan and Wu, Jeffrey and Brown, Tom B and Radford, Alec and Amodei, Dario and Christiano, Paul and Irving, Geoffrey},
  journal = {arXiv preprint arXiv:1909.08593},
  year    = {2019}
}
@article{miller1995wordnet,
  title     = {WordNet: a lexical database for English},
  author    = {Miller, George A},
  journal   = {Communications of the ACM},
  volume    = {38},
  number    = {11},
  pages     = {39--41},
  year      = {1995},
  publisher = {ACM New York, NY, USA}
}
@inproceedings{radford2021learning,
  title        = {Learning transferable visual models from natural language supervision},
  author       = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and others},
  booktitle    = {International Conference on Machine Learning},
  pages        = {8748--8763},
  year         = {2021},
  organization = {PMLR}
}

@article{settles2009active,
  title     = {Active learning literature survey},
  author    = {Settles, Burr},
  year      = {2009},
  publisher = {University of Wisconsin-Madison Department of Computer Sciences}
}

@article{reimers2019sentence,
  title   = {Sentence-bert: Sentence embeddings using siamese bert-networks},
  author  = {Reimers, Nils and Gurevych, Iryna},
  journal = {arXiv preprint arXiv:1908.10084},
  year    = {2019}
}
@inproceedings{kornblith2019better,
  title     = {Do better imagenet models transfer better?},
  author    = {Kornblith, Simon and Shlens, Jonathon and Le, Quoc V},
  booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages     = {2661--2671},
  year      = {2019}
}

@inproceedings{he2022masked,
  title     = {Masked autoencoders are scalable vision learners},
  author    = {He, Kaiming and Chen, Xinlei and Xie, Saining and Li, Yanghao and Doll{\'a}r, Piotr and Girshick, Ross},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages     = {16000--16009},
  year      = {2022}
}

@inproceedings{li2022understanding,
  title        = {Understanding Collapse in Non-Contrastive Siamese Representation Learning},
  author       = {Li, Alexander C and Efros, Alexei A and Pathak, Deepak},
  booktitle    = {European Conference on Computer Vision},
  pages        = {490--505},
  year         = {2022},
  organization = {Springer}
}
@article{bao2021beit,
  title   = {Beit: Bert pre-training of image transformers},
  author  = {Bao, Hangbo and Dong, Li and Wei, Furu},
  journal = {arXiv preprint arXiv:2106.08254},
  year    = {2021}
}
@inproceedings{caron2021emerging,
  title     = {Emerging properties in self-supervised vision transformers},
  author    = {Caron, Mathilde and Touvron, Hugo and Misra, Ishan and J{\'e}gou, Herv{\'e} and Mairal, Julien and Bojanowski, Piotr and Joulin, Armand},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages     = {9650--9660},
  year      = {2021}
}
@misc{Joeclinton1,
  author       = {Clinton, Joe},
  title        = {Google Images Download (fork)},
  year         = {2020},
  publisher    = {GitHub},
  journal      = {GitHub repository},
  howpublished = {\url{https://github.com/Joeclinton1/google-images-download}},
  commit       = {e91e6a38ad654877f59edc7894822f5319d75df1}
}

@misc{hardikvasa,
  author       = {Vasa, Hardik},
  title        = {Google Images Download},
  year         = {2015},
  publisher    = {GitHub},
  journal      = {GitHub repository},
  howpublished = {\url{https://github.com/hardikvasa/google-images-download}}
}

@article{williams1995gaussian,
  title   = {Gaussian processes for regression},
  author  = {Williams, Christopher and Rasmussen, Carl},
  journal = {Advances in neural information processing systems},
  volume  = {8},
  year    = {1995}
}

@article{ilyas2022datamodels,
  title   = {Datamodels: Predicting predictions from training data},
  author  = {Ilyas, Andrew and Park, Sung Min and Engstrom, Logan and Leclerc, Guillaume and Madry, Aleksander},
  journal = {arXiv preprint arXiv:2202.00622},
  year    = {2022}
}
@inproceedings{koh2017understanding,
  title        = {Understanding black-box predictions via influence functions},
  author       = {Koh, Pang Wei and Liang, Percy},
  booktitle    = {International conference on machine learning},
  pages        = {1885--1894},
  year         = {2017},
  organization = {PMLR}
}
@article{paul2021deep,
  title   = {Deep learning on a data diet: Finding important examples early in training},
  author  = {Paul, Mansheej and Ganguli, Surya and Dziugaite, Gintare Karolina},
  journal = {Advances in Neural Information Processing Systems},
  volume  = {34},
  pages   = {20596--20607},
  year    = {2021}
}
@article{feldman2020neural,
  title   = {What neural networks memorize and why: Discovering the long tail via influence estimation},
  author  = {Feldman, Vitaly and Zhang, Chiyuan},
  journal = {Advances in Neural Information Processing Systems},
  volume  = {33},
  pages   = {2881--2891},
  year    = {2020}
}
% hard negatives 
@article{robinson2020contrastive,
  title   = {Contrastive learning with hard negative samples},
  author  = {Robinson, Joshua and Chuang, Ching-Yao and Sra, Suvrit and Jegelka, Stefanie},
  journal = {arXiv preprint arXiv:2010.04592},
  year    = {2020}
}
@inproceedings{schroff2015facenet,
  title     = {Facenet: A unified embedding for face recognition and clustering},
  author    = {Schroff, Florian and Kalenichenko, Dmitry and Philbin, James},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages     = {815--823},
  year      = {2015}
}
@inproceedings{oh2016deep,
  title     = {Deep metric learning via lifted structured feature embedding},
  author    = {Oh Song, Hyun and Xiang, Yu and Jegelka, Stefanie and Savarese, Silvio},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages     = {4004--4012},
  year      = {2016}
}
@inproceedings{harwood2017smart,
  title     = {Smart mining for deep metric learning},
  author    = {Harwood, Ben and Kumar BG, Vijay and Carneiro, Gustavo and Reid, Ian and Drummond, Tom},
  booktitle = {Proceedings of the IEEE International Conference on Computer Vision},
  pages     = {2821--2829},
  year      = {2017}
}
@inproceedings{wu2017sampling,
  title     = {Sampling matters in deep embedding learning},
  author    = {Wu, Chao-Yuan and Manmatha, R and Smola, Alexander J and Krahenbuhl, Philipp},
  booktitle = {Proceedings of the IEEE international conference on computer vision},
  pages     = {2840--2848},
  year      = {2017}
}
@inproceedings{ge2018deep,
  title     = {Deep metric learning with hierarchical triplet loss},
  author    = {Ge, Weifeng},
  booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
  pages     = {269--285},
  year      = {2018}
}
@article{jiang2021improving,
  title   = {Improving contrastive learning on imbalanced data via open-world sampling},
  author  = {Jiang, Ziyu and Chen, Tianlong and Chen, Ting and Wang, Zhangyang},
  journal = {Advances in Neural Information Processing Systems},
  volume  = {34},
  pages   = {5997--6009},
  year    = {2021}
}

@inproceedings{bossard2014food,
  title        = {Food-101--mining discriminative components with random forests},
  author       = {Bossard, Lukas and Guillaumin, Matthieu and Gool, Luc Van},
  booktitle    = {European conference on computer vision},
  pages        = {446--461},
  year         = {2014},
  organization = {Springer}
}
@inproceedings{berg2014birdsnap,
  title     = {Birdsnap: Large-scale fine-grained visual categorization of birds},
  author    = {Berg, Thomas and Liu, Jiongxin and Woo Lee, Seung and Alexander, Michelle L and Jacobs, David W and Belhumeur, Peter N},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages     = {2011--2018},
  year      = {2014}
}
@inproceedings{parkhi2012cats,
  title        = {Cats and dogs},
  author       = {Parkhi, Omkar M and Vedaldi, Andrea and Zisserman, Andrew and Jawahar, CV},
  booktitle    = {2012 IEEE conference on computer vision and pattern recognition},
  pages        = {3498--3505},
  year         = {2012},
  organization = {IEEE}
}

@misc{gpt-j,
  author       = {Wang, Ben and Komatsuzaki, Aran},
  title        = {{GPT-J-6B: A 6 Billion Parameter Autoregressive Language Model}},
  howpublished = {\url{https://github.com/kingoflolz/mesh-transformer-jax}},
  year         = 2021,
  month        = May
}

@article{schuhmann2021laion,
  title   = {Laion-400m: Open dataset of clip-filtered 400 million image-text pairs},
  author  = {Schuhmann, Christoph and Vencu, Richard and Beaumont, Romain and Kaczmarczyk, Robert and Mullis, Clayton and Katta, Aarush and Coombes, Theo and Jitsev, Jenia and Komatsuzaki, Aran},
  journal = {arXiv preprint arXiv:2111.02114},
  year    = {2021}
}
@article{mezuman2012learning,
  title   = {Learning about canonical views from internet image collections},
  author  = {Mezuman, Elad and Weiss, Yair},
  journal = {Advances in neural information processing systems},
  volume  = {25},
  year    = {2012}
}

@article{baevski2022data2vec,
  title   = {Data2vec: A general framework for self-supervised learning in speech, vision and language},
  author  = {Baevski, Alexei and Hsu, Wei-Ning and Xu, Qiantong and Babu, Arun and Gu, Jiatao and Auli, Michael},
  journal = {arXiv preprint arXiv:2202.03555},
  year    = {2022}
}

@article{assran2022masked,
  title   = {Masked siamese networks for label-efficient learning},
  author  = {Assran, Mahmoud and Caron, Mathilde and Misra, Ishan and Bojanowski, Piotr and Bordes, Florian and Vincent, Pascal and Joulin, Armand and Rabbat, Michael and Ballas, Nicolas},
  journal = {arXiv preprint arXiv:2204.07141},
  year    = {2022}
}
@article{kamath2022webly,
  title   = {Webly Supervised Concept Expansion for General Purpose Vision Models},
  author  = {Kamath, Amita and Clark, Christopher and Gupta, Tanmay and Kolve, Eric and Hoiem, Derek and Kembhavi, Aniruddha},
  journal = {arXiv preprint arXiv:2202.02317},
  year    = {2022}
}
@inproceedings{chen2015webly,
  title     = {Webly supervised learning of convolutional networks},
  author    = {Chen, Xinlei and Gupta, Abhinav},
  booktitle = {Proceedings of the IEEE international conference on computer vision},
  pages     = {1431--1439},
  year      = {2015}
}
@article{sutton1991dyna,
  title     = {Dyna, an integrated architecture for learning, planning, and reacting},
  author    = {Sutton, Richard S},
  journal   = {ACM Sigart Bulletin},
  volume    = {2},
  number    = {4},
  pages     = {160--163},
  year      = {1991},
  publisher = {ACM New York, NY, USA}
}
@article{johnson2019billion,
  title     = {Billion-scale similarity search with {GPUs}},
  author    = {Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
  journal   = {IEEE Transactions on Big Data},
  volume    = {7},
  number    = {3},
  pages     = {535--547},
  year      = {2019},
  publisher = {IEEE}
}
@article{schuhmann2022laion,
  title   = {LAION-5B: An open large-scale dataset for training next generation image-text models},
  author  = {Schuhmann, Christoph and Beaumont, Romain and Vencu, Richard and Gordon, Cade and Wightman, Ross and Cherti, Mehdi and Coombes, Theo and Katta, Aarush and Mullis, Clayton and Wortsman, Mitchell and others},
  journal = {arXiv preprint arXiv:2210.08402},
  year    = {2022}
}
@misc{imagehash,
  author       = {Buchner, Johannes},
  title        = {imagehash (fork)},
  year         = {2021},
  publisher    = {GitHub},
  journal      = {GitHub repository},
  howpublished = {\url{https://github.com/JohannesBuchner/imagehash}},
  commit       = {71db1d3324ec8344c507f5a89a529ff0c0156678}
}
@inproceedings{fmow2018,
  title     = {Functional Map of the World},
  author    = {Christie, Gordon and Fendley, Neil and Wilson, James and Mukherjee, Ryan},
  booktitle = {CVPR},
  year      = {2018}
}


@article{adams2011ranking,
  author       = {Adams, Ryan Prescott and Zemel, Richard S},
  date         = {2011},
  journaltitle = {arXiv preprint arXiv:1106.1925},
  title        = {Ranking via Sinkhorn Propagation}
}

@article{agrawal2019differentiating,
  author       = {{Agrawal}, Akshay and {Barratt}, Shane and {Boyd}, Stephen and {Busseti}, Enzo and {Moursi}, Walaa M.},
  date         = {2019-04},
  eid          = {arXiv:1904.09043},
  eprint       = {1904.09043},
  eprintclass  = {math.OC},
  eprinttype   = {arXiv},
  journaltitle = {arXiv e-prints},
  keywords     = {Mathematics - Optimization and Control},
  pages        = {arXiv:1904.09043},
  title        = {{Differentiating Through a Conic Program}}
}

@article{alizadeh2003second,
  author       = {Alizadeh, Farid and Goldfarb, Donald},
  publisher    = {Springer},
  date         = {2003},
  journaltitle = {Mathematical programming},
  number       = {1},
  pages        = {3--51},
  title        = {Second-order cone programming},
  volume       = {95}
}

@article{arulkumaran2017brief,
  author       = {Arulkumaran, Kai and Deisenroth, Marc Peter and Brundage, Miles and Bharath, Anil Anthony},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1708.05866},
  title        = {A brief survey of deep reinforcement learning}
}

@article{bahdanau2014neural,
  author       = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
  date         = {2014},
  journaltitle = {arXiv preprint arXiv:1409.0473},
  title        = {Neural machine translation by jointly learning to align and translate}
}

@article{balog2016deepcoder,
  author       = {Balog, Matej and Gaunt, Alexander L and Brockschmidt, Marc and Nowozin, Sebastian and Tarlow, Daniel},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1611.01989},
  title        = {Deepcoder: Learning to write programs}
}

@article{bansal2017mbmf,
  author       = {Bansal, Somil and Calandra, Roberto and Levine, Sergey and Tomlin, Claire},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1709.03153},
  title        = {MBMF: Model-Based Priors for Model-Free Reinforcement Learning}
}

@article{barratt2018differentiability,
  author       = {Barratt, Shane},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1804.05098},
  title        = {On the differentiability of the solution to convex optimization problems}
}

@article{barratt2018optimizing,
  author       = {Barratt, Shane and Sharma, Rishi},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1805.07072},
  title        = {Optimizing for Generalization in Machine Learning with Cross-Validation Gradients}
}

@article{barratt2019least,
  author       = {Barratt, Shane and Boyd, Stephen},
  date         = {2019},
  journaltitle = {arXiv preprint arXiv:1904.05460},
  title        = {Least Squares Auto-Tuning}
}

@article{barzilai1988two,
  author       = {Barzilai, Jonathan and Borwein, Jonathan M},
  publisher    = {Oxford University Press},
  date         = {1988},
  journaltitle = {IMA Journal of Numerical Analysis},
  number       = {1},
  pages        = {141--148},
  title        = {Two-point step size gradient methods},
  volume       = {8}
}

@article{battaglia2018relational,
  author       = {Battaglia, Peter W and Hamrick, Jessica B and Bapst, Victor and Sanchez-Gonzalez, Alvaro and Zambaldi, Vinicius and Malinowski, Mateusz and Tacchetti, Andrea and Raposo, David and Santoro, Adam and Faulkner, Ryan and others},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1806.01261},
  title        = {Relational inductive biases, deep learning, and graph networks}
}

@article{bauschke2017convex,
  author    = {Bauschke, Heinz H and Combettes, Patrick L},
  publisher = {Springer},
  date      = {2017},
  title     = {Convex Analysis and Monotone Operator Theory in Hilbert Spaces, second edition}
}

@article{beck2009fast,
  author       = {Beck, Amir and Teboulle, Marc},
  publisher    = {SIAM},
  date         = {2009},
  journaltitle = {SIAM journal on imaging sciences},
  number       = {1},
  pages        = {183--202},
  title        = {A fast iterative shrinkage-thresholding algorithm for linear inverse problems},
  volume       = {2}
}

@article{belanger2015structured,
  author       = {Belanger, David and McCallum, Andrew},
  date         = {2015},
  journaltitle = {arXiv:1511.06350},
  title        = {Structured Prediction Energy Networks}
}

@article{bengio-lecun-henderson-94,
  author       = {Bengio, Yoshua and LeCun, Yann and Henderson, Donnie},
  publisher    = {MORGAN KAUFMANN PUBLISHERS},
  date         = {1994},
  journaltitle = {Advances in neural information processing systems},
  pages        = {937--937},
  title        = {Globally trained handwritten word recognizer using spatial representation, convolutional neural networks, and hidden Markov models}
}

@article{bengio2015deep,
  author       = {Bengio, Yoshua and Goodfellow, Ian J and Courville, Aaron},
  publisher    = {Citeseer},
  date         = {2015},
  journaltitle = {An MIT Press book in preparation. Draft chapters available at http://www. iro. umontreal. ca/~ bengioy/dlbook},
  title        = {Deep learning}
}

@article{bertinetto2018meta,
  author       = {Bertinetto, Luca and Henriques, Jo{\~{a}}o F and Torr, Philip HS and Vedaldi, Andrea},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1805.08136},
  title        = {Meta-learning with differentiable closed-form solvers}
}

@article{bertsekas1982projected,
  author       = {Bertsekas, Dimitri P},
  publisher    = {SIAM},
  date         = {1982},
  journaltitle = {SIAM Journal on control and Optimization},
  number       = {2},
  pages        = {221--246},
  title        = {Projected Newton methods for optimization problems with simple constraints},
  volume       = {20}
}

@article{bibi2018deep,
  author = {Bibi, Adel and Ghanem, Bernard and Koltun, Vladlen and Ranftl, Ren{\'{e}}},
  date   = {2018},
  title  = {Deep Layers as Stochastic Solvers}
}

@article{birgin2000nonmonotone,
  author       = {Birgin, Ernesto G and Mart{\'{\i{}}}nez, Jos{\'{e}} Mario and Raydan, Marcos},
  publisher    = {SIAM},
  date         = {2000},
  journaltitle = {SIAM Journal on Optimization},
  number       = {4},
  pages        = {1196--1211},
  title        = {Nonmonotone spectral projected gradient methods on convex sets},
  volume       = {10}
}

@article{boyd2011distributed,
  author       = {Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan},
  publisher    = {Now Publishers Inc.},
  date         = {2011},
  journaltitle = {Foundations and Trends{\textregistered{}{}{}{}{}{}{}{}{}{}{}} in Machine Learning},
  number       = {1},
  pages        = {1--122},
  title        = {Distributed optimization and statistical learning via the alternating direction method of multipliers},
  volume       = {3}
}

@article{brakel2013training,
  author       = {Brakel, Phil{\'{e}}mon and Stroobandt, Dirk and Schrauwen, Benjamin},
  date         = {2013},
  journaltitle = {Journal of Machine Learning Research},
  number       = {1},
  pages        = {2771--2797},
  title        = {Training energy-based models for time-series imputation.},
  volume       = {14}
}

@article{brockman2016openai,
  author       = {Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1606.01540},
  title        = {OpenAI Gym}
}

@article{bronstein2017geometric,
  author       = {Bronstein, Michael M and Bruna, Joan and LeCun, Yann and Szlam, Arthur and Vandergheynst, Pierre},
  publisher    = {IEEE},
  date         = {2017},
  journaltitle = {IEEE Signal Processing Magazine},
  number       = {4},
  pages        = {18--42},
  title        = {Geometric deep learning: going beyond euclidean data},
  volume       = {34}
}

@article{brukhim2018predict,
  author       = {Brukhim, Nataly and Globerson, Amir},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1802.04721},
  title        = {Predict and Constrain: Modeling Cardinality in Deep Structured Prediction}
}

@article{bruna2013spectral,
  author       = {Bruna, Joan and Zaremba, Wojciech and Szlam, Arthur and LeCun, Yann},
  date         = {2013},
  journaltitle = {arXiv preprint arXiv:1312.6203},
  title        = {Spectral networks and locally connected networks on graphs}
}

@article{busseti2018solution,
  author       = {Busseti, Enzo and Moursi, W and Boyd, Stephen},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1811.02157},
  title        = {Solution Refinement at Regular Points of Conic Problems}
}

@article{chebotar2017combining,
  author       = {Chebotar, Yevgen and Hausman, Karol and Zhang, Marvin and Sukhatme, Gaurav and Schaal, Stefan and Levine, Sergey},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1703.03078},
  title        = {Combining Model-Based and Model-Free Updates for Trajectory-Centric Reinforcement Learning}
}

@article{chen2018optimal,
  author       = {Chen, Yize and Shi, Yuanyuan and Zhang, Baosen},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1805.11835},
  title        = {Optimal Control Via Neural Networks: A Convex Approach}
}

@article{clarke1975generalized,
  author       = {Clarke, Frank H},
  date         = {1975},
  journaltitle = {Transactions of the American Mathematical Society},
  pages        = {247--262},
  title        = {Generalized gradients and applications},
  volume       = {205}
}

@article{dalal2018safe,
  author       = {Dalal, Gal and Dvijotham, Krishnamurthy and Vecerik, Matej and Hester, Todd and Paduraru, Cosmin and Tassa, Yuval},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1801.08757},
  title        = {Safe exploration in continuous action spaces}
}

@article{dean2017sample,
  author       = {Dean, Sarah and Mania, Horia and Matni, Nikolai and Recht, Benjamin and Tu, Stephen},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1710.01688},
  title        = {On the Sample Complexity of the Linear Quadratic Regulator}
}

@article{diamond2016cvxpy,
  author       = {Diamond, Steven and Boyd, Stephen},
  publisher    = {JMLR. org},
  date         = {2016},
  journaltitle = {The Journal of Machine Learning Research},
  number       = {1},
  pages        = {2909--2913},
  title        = {CVXPY: A Python-embedded modeling language for convex optimization},
  volume       = {17}
}

@article{diamond2017unrolled,
  author       = {Diamond, Steven and Sitzmann, Vincent and Heide, Felix and Wetzstein, Gordon},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1705.08041},
  title        = {Unrolled optimization with deep priors}
}

@article{dini1877analisi,
  author       = {Dini, U},
  date         = {1877},
  journaltitle = {Universit{\'{a}} di Pisa (1877/78)},
  title        = {Analisi infinitesimale, Lezioni dettate nella R}
}

@article{dontchev2009implicit,
  author       = {Dontchev, Asen L and Rockafellar, R Tyrrell},
  date         = {2009},
  journaltitle = {Springer Monogr. Math.},
  title        = {Implicit functions and solution mappings}
}

@article{doya2000reinforcement,
  author       = {Doya, Kenji},
  publisher    = {MIT Press},
  date         = {2000},
  journaltitle = {Neural computation},
  number       = {1},
  pages        = {219--245},
  title        = {Reinforcement learning in continuous time and space},
  volume       = {12}
}

@article{duchi2011adaptive,
  author       = {Duchi, John and Hazan, Elad and Singer, Yoram},
  publisher    = {JMLR. org},
  date         = {2011},
  journaltitle = {The Journal of Machine Learning Research},
  pages        = {2121--2159},
  title        = {Adaptive subgradient methods for online learning and stochastic optimization},
  volume       = {12}
}

@article{farquhar2017treeqn,
  author       = {Farquhar, Gregory and Rockt{\"{a}}schel, Tim and Igl, Maximilian and Whiteson, Shimon},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1710.11417},
  title        = {TreeQN and ATreeC: Differentiable Tree Planning for Deep Reinforcement Learning}
}

@article{fiacco1990sensitivity,
  author       = {Fiacco, Anthony V and Ishizuka, Yo},
  publisher    = {Springer},
  date         = {1990},
  journaltitle = {Annals of Operations Research},
  number       = {1},
  pages        = {215--235},
  title        = {Sensitivity and stability analysis for nonlinear programming},
  volume       = {27}
}

@article{foerster2017learning,
  author       = {Foerster, Jakob N and Chen, Richard Y and Al-Shedivat, Maruan and Whiteson, Shimon and Abbeel, Pieter and Mordatch, Igor},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1709.04326},
  title        = {Learning with Opponent-Learning Awareness}
}

@article{forsyth2003modern,
  author       = {Forsyth, David A and Ponce, Jean},
  publisher    = {Prentice-Hall},
  date         = {2003},
  journaltitle = {Computer vision: a modern approach},
  pages        = {88--101},
  title        = {A modern approach}
}

@article{garcia1989model,
  author       = {Garcia, Carlos E and Prett, David M and Morari, Manfred},
  publisher    = {Elsevier},
  date         = {1989},
  journaltitle = {Automatica},
  number       = {3},
  pages        = {335--348},
  title        = {Model predictive control: theory and practice---a survey},
  volume       = {25}
}

@article{gatys2015neural,
  author       = {Gatys, Leon A and Ecker, Alexander S and Bethge, Matthias},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1508.06576},
  title        = {A neural algorithm of artistic style}
}

@article{gilmer2017neural,
  author       = {Gilmer, Justin and Schoenholz, Samuel S and Riley, Patrick F and Vinyals, Oriol and Dahl, George E},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1704.01212},
  title        = {Neural message passing for quantum chemistry}
}

@article{gonzalez2011robust,
  author       = {Gonz{\'{a}}lez, Ram{\'{o}}n and Fiacchini, Mirko and Guzm{\'{a}}n, Jos{\'{e}} Luis and {\'{A}}lamo, Teodoro and Rodr{\'{\i{}}}guez, Francisco},
  publisher    = {Elsevier},
  date         = {2011},
  journaltitle = {Robotics and Autonomous Systems},
  number       = {10},
  pages        = {711--726},
  title        = {Robust tube-based predictive control for mobile robots in off-road conditions},
  volume       = {59}
}

@article{gould2016differentiating,
  author       = {Gould, Stephen and Fernando, Basura and Cherian, Anoop and Anderson, Peter and Santa Cruz, Rodrigo and Guo, Edison},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1607.05447},
  title        = {On Differentiating Parameterized Argmin and Argmax Problems with Application to Bi-level Optimization}
}

@article{grathwohl2018ffjord,
  author       = {Grathwohl, Will and Chen, Ricky TQ and Betterncourt, Jesse and Sutskever, Ilya and Duvenaud, David},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1810.01367},
  title        = {Ffjord: Free-form continuous dynamics for scalable reversible generative models}
}

@article{graves2014neural,
  author       = {Graves, Alex and Wayne, Greg and Danihelka, Ivo},
  date         = {2014},
  journaltitle = {arXiv preprint arXiv:1410.5401},
  title        = {Neural turing machines}
}

@article{graves2016hybrid,
  author       = {Graves, Alex and Wayne, Greg and Reynolds, Malcolm and Harley, Tim and Danihelka, Ivo and Grabska-Barwi{\'{n}}ska, Agnieszka and Colmenarejo, Sergio G{\'{o}}mez and Grefenstette, Edward and Ramalho, Tiago and Agapiou, John and others},
  publisher    = {Nature Publishing Group},
  date         = {2016},
  journaltitle = {Nature},
  number       = {7626},
  pages        = {471},
  title        = {Hybrid computing using a neural network with dynamic external memory},
  volume       = {538}
}

@article{gu2017qprop,
  author       = {Gu, Shixiang and Lillicrap, Timothy and Ghahramani, Zoubin and Turner, Richard E and Levine, Sergey},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1611.02247},
  title        = {Q-prop: Sample-efficient policy gradient with an off-policy critic}
}

@article{gulcehre2018hyperbolic,
  author       = {Gulcehre, Caglar and Denil, Misha and Malinowski, Mateusz and Razavi, Ali and Pascanu, Razvan and Hermann, Karl Moritz and Battaglia, Peter and Bapst, Victor and Raposo, David and Santoro, Adam and others},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1805.09786},
  title        = {Hyperbolic attention networks}
}

@article{gupta2014training,
  author       = {Gupta, Maya R and Bengio, Samy and Weston, Jason},
  publisher    = {JMLR. org},
  date         = {2014},
  journaltitle = {The Journal of Machine Learning Research},
  number       = {1},
  pages        = {1461--1492},
  title        = {Training highly multiclass classifiers},
  volume       = {15}
}

@article{he2015deep,
  author       = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1512.03385},
  title        = {Deep Residual Learning for Image Recognition}
}

@article{herzig2018mapping,
  author       = {Herzig, Roei and Raboh, Moshiko and Chechik, Gal and Berant, Jonathan and Globerson, Amir},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1802.05451},
  title        = {Mapping Images to Scene Graphs with Permutation-Invariant Structured Prediction}
}

@article{hill2015goldilocks,
  author       = {Hill, Felix and Bordes, Antoine and Chopra, Sumit and Weston, Jason},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1511.02301},
  title        = {The goldilocks principle: Reading children's books with explicit memory representations}
}

@article{hinton2018matrix,
  author = {Hinton, Geoffrey E and Sabour, Sara and Frosst, Nicholas},
  date   = {2018},
  title  = {Matrix capsules with EM routing}
}

@article{huang2016densely,
  author       = {Huang, Gao and Liu, Zhuang and Weinberger, Kilian Q},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1608.06993},
  title        = {Densely Connected Convolutional Networks}
}

@article{hunter2007matplotlib,
  author       = {Hunter, John D},
  publisher    = {IEEE Computer Society},
  date         = {2007},
  journaltitle = {Computing in science \& engineering},
  number       = {3},
  pages        = {90},
  title        = {Matplotlib: A 2D graphics environment},
  volume       = {9}
}

@article{ingraham2018learning,
  author = {Ingraham, John and Riesselman, Adam and Sander, Chris and Marks, Debora},
  date   = {2018},
  title  = {Learning Protein Structure with a Differentiable Simulator}
}

@article{jones2014scipy,
  author = {Jones, Eric and Oliphant, Travis and Peterson, Pearu},
  date   = {2014},
  title  = {$\{$SciPy$\}$: Open source scientific tools for $\{$Python$\}$}
}

@article{jonschkowski2018differentiable,
  author       = {Jonschkowski, Rico and Rastogi, Divyam and Brock, Oliver},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1805.11122},
  title        = {Differentiable particle filters: End-to-end learning with algorithmic priors}
}

@article{karkus2018integrating,
  author       = {Karkus, Peter and Hsu, David and Lee, Wee Sun},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1807.06696},
  title        = {Integrating Algorithmic Planning and Deep Learning for Partially Observable Navigation}
}

@article{katakis2008multilabel,
  author       = {Katakis, Ioannis and Tsoumakas, Grigorios and Vlahavas, Ioannis},
  date         = {2008},
  journaltitle = {ECML PKDD discovery challenge},
  title        = {Multilabel text classification for automated tag suggestion},
  volume       = {75}
}

@article{kennedy1988neural,
  author       = {Kennedy, Michael Peter and Chua, Leon O},
  publisher    = {IEEE},
  date         = {1988},
  journaltitle = {IEEE Transactions on Circuits and Systems},
  number       = {5},
  pages        = {554--562},
  title        = {Neural networks for nonlinear programming},
  volume       = {35}
}

@article{kingma2014adam,
  author       = {Kingma, Diederik and Ba, Jimmy},
  date         = {2014},
  journaltitle = {arXiv preprint arXiv:1412.6980},
  title        = {Adam: A method for stochastic optimization}
}

@article{kipf2016semi,
  author       = {Kipf, Thomas N and Welling, Max},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1609.02907},
  title        = {Semi-supervised classification with graph convolutional networks}
}

@article{koh2017understanding,
  author       = {Koh, Pang Wei and Liang, Percy},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1703.04730},
  title        = {Understanding black-box predictions via influence functions}
}

@article{krishna2017visual,
  author       = {Krishna, Ranjay and Zhu, Yuke and Groth, Oliver and Johnson, Justin and Hata, Kenji and Kravitz, Joshua and Chen, Stephanie and Kalantidis, Yannis and Li, Li-Jia and Shamma, David A and others},
  publisher    = {Springer},
  date         = {2017},
  journaltitle = {International Journal of Computer Vision},
  number       = {1},
  pages        = {32--73},
  title        = {Visual genome: Connecting language and vision using crowdsourced dense image annotations},
  volume       = {123}
}

@article{kunisch2013bilevel,
  author       = {Kunisch, Karl and Pock, Thomas},
  publisher    = {SIAM},
  date         = {2013},
  journaltitle = {SIAM Journal on Imaging Sciences},
  number       = {2},
  pages        = {938--983},
  title        = {A bilevel optimization approach for parameter learning in variational models},
  volume       = {6}
}

@article{lafferty2001conditional,
  author = {Lafferty, John and McCallum, Andrew and Pereira, Fernando CN},
  date   = {2001},
  title  = {Conditional random fields: Probabilistic models for segmenting and labeling sequence data}
}

@article{lecun1998gradient,
  author       = {LeCun, Yann and Bottou, L{\'{e}}on and Bengio, Yoshua and Haffner, Patrick},
  publisher    = {IEEE},
  date         = {1998},
  journaltitle = {Proceedings of the IEEE},
  number       = {11},
  pages        = {2278--2324},
  title        = {Gradient-based learning applied to document recognition},
  volume       = {86}
}

@article{lecun2006tutorial,
  author       = {LeCun, Yann and Chopra, Sumit and Hadsell, Raia and Ranzato, M and Huang, F},
  date         = {2006},
  journaltitle = {Predicting structured data},
  title        = {A tutorial on energy-based learning},
  volume       = {1}
}

@article{lee2019meta,
  author       = {Lee, Kwonjoon and Maji, Subhransu and Ravichandran, Avinash and Soatto, Stefano},
  date         = {2019},
  journaltitle = {arXiv preprint arXiv:1904.03758},
  title        = {Meta-Learning with Differentiable Convex Optimization}
}

@article{levine2016end,
  author       = {Levine, Sergey and Finn, Chelsea and Darrell, Trevor and Abbeel, Pieter},
  publisher    = {JMLR. org},
  date         = {2016},
  journaltitle = {The Journal of Machine Learning Research},
  number       = {1},
  pages        = {1334--1373},
  title        = {End-to-end training of deep visuomotor policies},
  volume       = {17}
}

@article{li2018smoothing,
  author = {Li, Xiang and Vilnis, Luke and Zhang, Dongxu and Boratko, Michael and McCallum, Andrew},
  date   = {2018},
  title  = {Smoothing the Geometry of Probabilistic Box Embeddings}
}

@article{lillicrap2015continuous,
  author       = {Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1509.02971},
  title        = {Continuous control with deep reinforcement learning}
}

@article{lillo1993solving,
  author       = {Lillo, Walter E and Loh, Mei Heng and Hui, Stefen and Zak, Stanislaw H},
  publisher    = {IEEE},
  date         = {1993},
  journaltitle = {IEEE Transactions on neural networks},
  number       = {6},
  pages        = {931--940},
  title        = {On solving constrained optimization problems with neural networks: A penalty method approach},
  volume       = {4}
}

@article{ling2018game,
  author       = {Ling, Chun Kai and Fang, Fei and Kolter, J. Zico},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1805.02777},
  title        = {What game are we playing? end-to-end learning in normal and extensive form games}
}

@article{liu2015transductive,
  author       = {Liu, Li-Ping and Dietterich, Thomas G and Li, Nan and Zhou, Zhi-Hua},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1510.05976},
  title        = {Transductive optimization of top k precision}
}

@article{lobo1998applications,
  author       = {Lobo, Miguel Sousa and Vandenberghe, Lieven and Boyd, Stephen and Lebret, Herv{\'{e}}},
  publisher    = {Elsevier},
  date         = {1998},
  journaltitle = {Linear algebra and its applications},
  number       = {1-3},
  pages        = {193--228},
  title        = {Applications of second-order cone programming},
  volume       = {284}
}

@article{lotstedt1984numerical,
  author       = {L{\"{o}}tstedt, Per},
  publisher    = {SIAM},
  date         = {1984},
  journaltitle = {SIAM journal on scientific and statistical computing},
  number       = {2},
  pages        = {370--393},
  title        = {Numerical simulation of time-dependent contact and friction problems in rigid body mechanics},
  volume       = {5}
}

@article{madjarov2012extensive,
  author       = {Madjarov, Gjorgji and Kocev, Dragi and Gjorgjevikj, Dejan and D{\v{z}}eroski, Sa{\v{s}}o},
  publisher    = {Elsevier},
  date         = {2012},
  journaltitle = {Pattern Recognition},
  number       = {9},
  pages        = {3084--3104},
  title        = {An extensive experimental comparison of methods for multi-label learning},
  volume       = {45}
}

@article{magnani2009convex,
  author       = {Magnani, Alessandro and Boyd, Stephen P},
  publisher    = {Springer},
  date         = {2009},
  journaltitle = {Optimization and Engineering},
  number       = {1},
  pages        = {1--17},
  title        = {Convex piecewise-linear fitting},
  volume       = {10}
}

@article{magnus1988matrix,
  author       = {Magnus, X and Neudecker, Heinz},
  date         = {1988},
  journaltitle = {New York},
  title        = {Matrix differential calculus}
}

@article{mairal2012task,
  author       = {Mairal, Julien and Bach, Francis and Ponce, Jean},
  publisher    = {IEEE},
  date         = {2012},
  journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  number       = {4},
  pages        = {791--804},
  title        = {Task-driven dictionary learning},
  volume       = {34}
}

@article{mattingley2012cvxgen,
  author       = {Mattingley, Jacob and Boyd, Stephen},
  publisher    = {Springer},
  date         = {2012},
  journaltitle = {Optimization and Engineering},
  number       = {1},
  pages        = {1--27},
  title        = {CVXGEN: A code generator for embedded convex optimization},
  volume       = {13}
}

@article{mena2018learning,
  author       = {Mena, Gonzalo and Belanger, David and Linderman, Scott and Snoek, Jasper},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1802.08665},
  title        = {Learning Latent Permutations with Gumbel-Sinkhorn Networks}
}

@article{mensch2018differentiable,
  author       = {Mensch, Arthur and Blondel, Mathieu},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1802.03676},
  title        = {Differentiable dynamic programming for structured prediction and attention}
}

@article{metz2016unrolled,
  author       = {Metz, Luke and Poole, Ben and Pfau, David and Sohl-Dickstein, Jascha},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1611.02163},
  title        = {Unrolled Generative Adversarial Networks}
}

@article{mnih2013playing,
  author       = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
  date         = {2013},
  journaltitle = {arXiv preprint arXiv:1312.5602},
  title        = {Playing atari with deep reinforcement learning}
}

@article{mnih2015human,
  author       = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
  publisher    = {Nature Publishing Group},
  date         = {2015},
  journaltitle = {Nature},
  number       = {7540},
  pages        = {529--533},
  title        = {Human-level control through deep reinforcement learning},
  volume       = {518}
}

@article{morari1999model,
  author       = {Morari, Manfred and Lee, Jay H},
  publisher    = {Elsevier},
  date         = {1999},
  journaltitle = {Computers \& Chemical Engineering},
  number       = {4},
  pages        = {667--682},
  title        = {Model predictive control: past, present and future},
  volume       = {23}
}

@article{moreau1961decomposition,
  author       = {Moreau, Jean Jacques},
  date         = {1961},
  journaltitle = {Comptes rendus hebdomadaires des s{\'{e}}ances de l'Acad{\'{e}}mie des sciences},
  pages        = {238--240},
  title        = {D{\'{e}}composition orthogonale d'un espace hilbertien selon deux c{\^{o}}nes mutuellement polaires},
  volume       = {255}
}

@article{neelakantan2015neural,
  author       = {Neelakantan, Arvind and Le, Quoc V and Sutskever, Ilya},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1511.04834},
  title        = {Neural programmer: Inducing latent programs with gradient descent}
}

@article{niculae2018sparsemap,
  author       = {Niculae, Vlad and Martins, Andr{\'{e}} FT and Blondel, Mathieu and Cardie, Claire},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1802.04223},
  title        = {SparseMAP: Differentiable sparse structured inference}
}

@article{odonoghue2016conic,
  author       = {O'Donoghue, Brendan and Chu, Eric and Parikh, Neal and Boyd, Stephen},
  publisher    = {Springer},
  date         = {2016},
  journaltitle = {Journal of Optimization Theory and Applications},
  number       = {3},
  pages        = {1042--1068},
  title        = {Conic optimization via operator splitting and homogeneous self-dual embedding},
  volume       = {169}
}

@article{oh2016minecraft,
  author       = {Oh, Junhyuk and Chockalingam, Valliappa and Singh, Satinder and Lee, Honglak},
  date         = {2016},
  journaltitle = {Proceedings of the 33rd International Conference on Machine Learning (ICML)},
  title        = {Control of Memory, Active Perception, and Action in Minecraft}
}

@article{okada2017path,
  author       = {Okada, Masashi and Rigazio, Luca and Aoshima, Takenobu},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1706.09597},
  title        = {Path Integral Networks: End-to-End Differentiable Optimal Control}
}

@article{oliphant2007python,
  author       = {Oliphant, Travis E},
  publisher    = {IEEE},
  date         = {2007},
  journaltitle = {Computing in Science \& Engineering},
  number       = {3},
  pages        = {10--20},
  title        = {Python for scientific computing},
  volume       = {9}
}

@article{paige1982lsqr,
  author       = {Paige, Christopher C and Saunders, Michael A},
  publisher    = {ACM},
  date         = {1982},
  journaltitle = {ACM Transactions on Mathematical Software (TOMS)},
  number       = {1},
  pages        = {43--71},
  title        = {LSQR: An algorithm for sparse linear equations and sparse least squares},
  volume       = {8}
}

@article{parisotto2016neuro,
  author       = {Parisotto, Emilio and Mohamed, Abdel-rahman and Singh, Rishabh and Li, Lihong and Zhou, Dengyong and Kohli, Pushmeet},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1611.01855},
  title        = {Neuro-symbolic program synthesis}
}

@article{parisotto2017neural,
  author       = {Parisotto, Emilio and Salakhutdinov, Ruslan},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1702.08360},
  title        = {Neural map: Structured memory for deep reinforcement learning}
}

@article{pascanu2017learning,
  author       = {Pascanu, Razvan and Li, Yujia and Vinyals, Oriol and Heess, Nicolas and Buesing, Lars and Racani{\`{e}}re, Sebastien and Reichert, David and Weber, Th{\'{e}}ophane and Wierstra, Daan and Battaglia, Peter},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1707.06170},
  title        = {Learning model-based planning from scratch}
}

@article{paszke2017automatic,
  author       = {Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam},
  date         = {2017},
  journaltitle = {NIPS Autodiff Workshop},
  title        = {Automatic differentiation in PyTorch}
}

@article{pathak2018zero,
  author       = {Pathak, Deepak and Mahmoudieh, Parsa and Luo, Guanghao and Agrawal, Pulkit and Chen, Dian and Shentu, Yide and Shelhamer, Evan and Malik, Jitendra and Efros, Alexei A and Darrell, Trevor},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1804.08606},
  title        = {Zero-shot visual imitation}
}

@article{pedregosa2011scikit,
  author       = {Pedregosa, Fabian and Varoquaux, Ga{\"{e}}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and others},
  publisher    = {JMLR. org},
  date         = {2011},
  journaltitle = {The Journal of Machine Learning Research},
  pages        = {2825--2830},
  title        = {Scikit-learn: Machine learning in Python},
  volume       = {12}
}

@article{peng1993efficient,
  author       = {Peng, Jing and Williams, Ronald J},
  publisher    = {Sage Publications Sage CA: Thousand Oaks, CA},
  date         = {1993},
  journaltitle = {Adaptive Behavior},
  number       = {4},
  pages        = {437--454},
  title        = {Efficient learning and planning within the Dyna framework},
  volume       = {1}
}

@article{pereira2018pinets,
  author       = {Pereira, Marcus and Fan, David D. and An, Gabriel Nakajima and Theodorou, Evangelos},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1802.05803},
  title        = {MPC-Inspired Neural Network Policies for Sequential Decision Making}
}

@article{polyak1964some,
  author       = {Polyak, Boris T},
  publisher    = {Elsevier},
  date         = {1964},
  journaltitle = {USSR Computational Mathematics and Mathematical Physics},
  number       = {5},
  pages        = {1--17},
  title        = {Some methods of speeding up the convergence of iteration methods},
  volume       = {4}
}

@article{pong2018temporal,
  author       = {Pong, Vitchyr and Gu, Shixiang and Dalal, Murtaza and Levine, Sergey},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1802.09081},
  title        = {Temporal Difference Models: Model-Free Deep RL for Model-Based Control}
}

@article{rakotomamonjy2012sparse,
  author       = {Rakotomamonjy, Alain},
  date         = {2012},
  journaltitle = {arXiv preprint arXiv:1206.6432},
  title        = {Sparse support vector infinite push}
}

@article{raposo2017discovering,
  author       = {Raposo, David and Santoro, Adam and Barrett, David and Pascanu, Razvan and Lillicrap, Timothy and Battaglia, Peter},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1702.05068},
  title        = {Discovering objects and their relations from entangled scene representations}
}

@article{ravi2016optimization,
  author = {Ravi, Sachin and Larochelle, Hugo},
  date   = {2016},
  title  = {Optimization as a model for few-shot learning}
}

@article{reed2015neural,
  author       = {Reed, Scott and De Freitas, Nando},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1511.06279},
  title        = {Neural programmer-interpreters}
}

@article{rockafellar1970convex,
  author       = {Rockafellar, R Tyrrell},
  date         = {1970},
  journaltitle = {Princeton, NJ},
  title        = {Convex Analysis Princeton University Press}
}

@article{rudin1992nonlinear,
  author       = {Rudin, Leonid I and Osher, Stanley and Fatemi, Emad},
  publisher    = {Elsevier},
  date         = {1992},
  journaltitle = {Physica D: Nonlinear Phenomena},
  number       = {1-4},
  pages        = {259--268},
  title        = {Nonlinear total variation based noise removal algorithms},
  volume       = {60}
}

@article{rudin2009p,
  author       = {Rudin, Cynthia},
  date         = {2009},
  journaltitle = {Journal of Machine Learning Research},
  number       = {Oct},
  pages        = {2233--2271},
  title        = {The p-norm push: A simple convex ranking algorithm that concentrates at the top of the list},
  volume       = {10}
}

@article{rumelhart1988learning,
  author       = {Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},
  date         = {1988},
  journaltitle = {Cognitive modeling},
  number       = {3},
  pages        = {1},
  title        = {Learning representations by back-propagating errors},
  volume       = {5}
}

@article{santa2018visual,
  author       = {Santa Cruz, Rodrigo and Fernando, Basura and Cherian, Anoop and Gould, Stephen},
  publisher    = {IEEE},
  date         = {2018},
  journaltitle = {IEEE transactions on pattern analysis and machine intelligence},
  title        = {Visual permutation learning}
}

@article{santoro2017simple,
  author       = {Santoro, Adam and Raposo, David and Barrett, David GT and Malinowski, Mateusz and Pascanu, Razvan and Battaglia, Peter and Lillicrap, Timothy},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1706.01427},
  title        = {A simple neural network module for relational reasoning}
}

@article{satyanarayanan2015edge,
  abstract     = {High-data-rate sensors, such as video cameras, are becoming ubiquitous in the Internet of Things. This article describes GigaSight, an Internet-scale repository of crowd-sourced video content, with strong enforcement of privacy preferences and access controls. The GigaSight architecture is a federated system of VM-based cloudlets that perform video analytics at the edge of the Internet, thus reducing the demand for ingress bandwidth into the cloud. Denaturing, which is an owner-specific reduction in fidelity of video content to preserve privacy, is one form of analytics on cloudlets. Content-based indexing for search is another form of cloudlet-based analytics. This article is part of a special issue on smart spaces.},
  author       = {Satyanarayanan, Mahadev and Simoens, Pieter and Xiao, Yu and Pillai, Padmanabhan and Chen, Zhuo and Ha, Kiryong and Hu, Wenlu and Amos, Brandon},
  publisher    = {IEEE},
  date         = {2015},
  journaltitle = {IEEE Pervasive Computing},
  number       = {2},
  pages        = {24--31},
  title        = {Edge Analytics in the Internet of Things}
}

@article{schmidhuber2015deep,
  author       = {Schmidhuber, J{\"{u}}rgen},
  publisher    = {Elsevier},
  date         = {2015},
  journaltitle = {Neural networks},
  pages        = {85--117},
  title        = {Deep learning in neural networks: An overview},
  volume       = {61}
}

@article{schulman2016trpogae,
  author       = {Schulman, John and Moritz, Philpp and Levine, Sergey and Jordan, Michael I. and Abbeel, Pieter},
  date         = {2016},
  journaltitle = {International Conference on Learning Representations},
  title        = {High-Dimensional Continuous Control Using Generalized Advantage Estimation}
}

@article{schulman2017proximal,
  author       = {Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1707.06347},
  title        = {Proximal policy optimization algorithms}
}

@article{shen2018ordered,
  author       = {Shen, Yikang and Tan, Shawn and Sordoni, Alessandro and Courville, Aaron},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1810.09536},
  title        = {Ordered Neurons: Integrating Tree Structures into Recurrent Neural Networks}
}

@article{silver2016mastering,
  author       = {Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
  publisher    = {Nature Publishing Group},
  date         = {2016},
  journaltitle = {Nature},
  number       = {7587},
  pages        = {484--489},
  title        = {Mastering the game of Go with deep neural networks and tree search},
  volume       = {529}
}

@article{silver2016predictron,
  author       = {Silver, David and van Hasselt, Hado and Hessel, Matteo and Schaul, Tom and Guez, Arthur and Harley, Tim and Dulac-Arnold, Gabriel and Reichert, David and Rabinowitz, Neil and Barreto, Andre and others},
  date         = {2016},
  journaltitle = {arXiv preprint arXiv:1612.08810},
  title        = {The predictron: End-to-end learning and planning}
}

@article{simonyan2014very,
  author       = {Simonyan, Karen and Zisserman, Andrew},
  date         = {2014},
  journaltitle = {arXiv preprint arXiv:1409.1556},
  title        = {Very deep convolutional networks for large-scale image recognition}
}

@article{srinivas2018universal,
  author       = {Srinivas, Aravind and Jabri, Allan and Abbeel, Pieter and Levine, Sergey and Finn, Chelsea},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1804.00645},
  title        = {Universal Planning Networks}
}

@article{sutton2012introduction,
  author       = {Sutton, Charles and McCallum, Andrew and others},
  publisher    = {Now Publishers, Inc.},
  date         = {2012},
  journaltitle = {Foundations and Trends{\textregistered{}{}{}{}{}} in Machine Learning},
  number       = {4},
  pages        = {267--373},
  title        = {An introduction to conditional random fields},
  volume       = {4}
}

@article{tang2018ba,
  author       = {Tang, Chengzhou and Tan, Ping},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1806.04807},
  title        = {Ba-net: Dense bundle adjustment network}
}

@article{tarlow2012fast,
  author       = {Tarlow, Daniel and Swersky, Kevin and Zemel, Richard S and Adams, Ryan Prescott and Frey, Brendan J},
  date         = {2012},
  journaltitle = {arXiv preprint arXiv:1210.4899},
  title        = {Fast exact inference for recursive cardinality models}
}

@article{theodorou2010generalized,
  author       = {Theodorou, Evangelos and Buchli, Jonas and Schaal, Stefan},
  date         = {2010},
  journaltitle = {Journal of Machine Learning Research},
  number       = {Nov},
  pages        = {3137--3181},
  title        = {A generalized path integral control approach to reinforcement learning},
  volume       = {11}
}

@article{tieleman2012lecture,
  author       = {Tieleman, Tijmen and Hinton, Geoffrey},
  date         = {2012},
  journaltitle = {COURSERA: Neural networks for machine learning},
  number       = {2},
  pages        = {26--31},
  title        = {Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude},
  volume       = {4}
}

@article{torvalds2005git,
  author       = {Torvalds, Linus and Hamano, J and others},
  date         = {2005},
  journaltitle = {\url{http://git-scm.com}},
  title        = {Git}
}

@article{tschiatschek2018differentiable,
  author       = {Tschiatschek, Sebastian and Sahin, Aytunc and Krause, Andreas},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1803.01785},
  title        = {Differentiable submodular maximization}
}

@article{tsochantaridis2005large,
  author       = {Tsochantaridis, Ioannis and Joachims, Thorsten and Hofmann, Thomas and Altun, Yasemin},
  date         = {2005},
  journaltitle = {Journal of Machine Learning Research},
  pages        = {1453--1484},
  title        = {Large margin methods for structured and interdependent output variables},
  volume       = {6}
}

@article{tsoumakas2011mulan,
  author       = {Tsoumakas, Grigorios and Spyromitros-Xioufis, Eleftherios and Vilcek, Jozef and Vlahavas, Ioannis},
  date         = {2011},
  journaltitle = {Journal of Machine Learning Research},
  number       = {Jul},
  pages        = {2411--2414},
  title        = {Mulan: A java library for multi-label learning},
  volume       = {12}
}

@article{uhlenbeck1930theory,
  author       = {Uhlenbeck, George E and Ornstein, Leonard S},
  publisher    = {APS},
  date         = {1930},
  journaltitle = {Physical review},
  number       = {5},
  pages        = {823},
  title        = {On the theory of the Brownian motion},
  volume       = {36}
}

@article{van2011numpy,
  author       = {Van Der Walt, Stefan and Colbert, S Chris and Varoquaux, Gael},
  publisher    = {IEEE Computer Society},
  date         = {2011},
  journaltitle = {Computing in Science \& Engineering},
  number       = {2},
  pages        = {22},
  title        = {The NumPy array: a structure for efficient numerical computation},
  volume       = {13}
}

@article{wainwright2008graphical,
  author       = {Wainwright, Martin J and Jordan, Michael I and others},
  publisher    = {Now Publishers, Inc.},
  date         = {2008},
  journaltitle = {Foundations and Trends{\textregistered{}{}{}{}{}{}{}{}} in Machine Learning},
  number       = {1--2},
  pages        = {1--305},
  title        = {Graphical models, exponential families, and variational inference},
  volume       = {1}
}

@article{wang2008fast,
  author       = {Wang, Yang and Boyd, Stephen},
  publisher    = {Elsevier},
  date         = {2008},
  journaltitle = {IFAC Proceedings Volumes},
  number       = {2},
  pages        = {6974--6979},
  title        = {Fast model predictive control using online optimization},
  volume       = {41}
}

@article{weber2017imagination,
  author       = {Weber, Th{\'{e}}ophane and Racani{\`{e}}re, S{\'{e}}bastien and Reichert, David P and Buesing, Lars and Guez, Arthur and Rezende, Danilo Jimenez and Badia, Adria Puigdom{\`{e}}nech and Vinyals, Oriol and Heess, Nicolas and Li, Yujia and others},
  date         = {2017},
  journaltitle = {arXiv preprint arXiv:1707.06203},
  title        = {Imagination-Augmented Agents for Deep Reinforcement Learning}
}

@article{williams2017model,
  author       = {Williams, Grady and Aldrich, Andrew and Theodorou, Evangelos A},
  publisher    = {American Institute of Aeronautics and Astronautics},
  date         = {2017},
  journaltitle = {Journal of Guidance, Control, and Dynamics},
  number       = {2},
  pages        = {344--357},
  title        = {Model predictive path integral control: From theory to parallel computation},
  volume       = {40}
}

@article{xinyi2018capsule,
  author = {Xinyi, Zhang and Chen, Lihui},
  date   = {2018},
  title  = {Capsule Graph Neural Network}
}

@article{xu2018powerful,
  author       = {Xu, Keyulu and Hu, Weihua and Leskovec, Jure and Jegelka, Stefanie},
  date         = {2018},
  journaltitle = {arXiv preprint arXiv:1810.00826},
  title        = {How Powerful are Graph Neural Networks?}
}

@article{yang2017support,
  author       = {Yang, Michael Ying and Liao, Wentong and Ackermann, Hanno and Rosenhahn, Bodo},
  publisher    = {Elsevier},
  date         = {2017},
  journaltitle = {ISPRS journal of photogrammetry and remote sensing},
  pages        = {15--25},
  title        = {On support relations and semantic scene graphs},
  volume       = {131}
}

@article{ye1994nl,
  author       = {Ye, Yinyu and Todd, Michael J and Mizuno, Shinji},
  publisher    = {INFORMS},
  date         = {1994},
  journaltitle = {Mathematics of Operations Research},
  number       = {1},
  pages        = {53--67},
  title        = {An $O(\sqrt{nL})$-iteration homogeneous and self-dual linear programming algorithm},
  volume       = {19}
}

@article{yosinski2015understanding,
  author       = {Yosinski, Jason and Clune, Jeff and Nguyen, Anh and Fuchs, Thomas and Lipson, Hod},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1506.06579},
  title        = {Understanding neural networks through deep visualization}
}

@article{yu2015multi,
  author       = {Yu, Fisher and Koltun, Vladlen},
  date         = {2015},
  journaltitle = {arXiv preprint arXiv:1511.07122},
  title        = {Multi-scale context aggregation by dilated convolutions}
}

@book{ben2001lectures,
  author    = {Ben-Tal, Ahron and Nemirovski, Arkadi},
  publisher = {Siam},
  date      = {2001},
  title     = {Lectures on modern convex optimization: analysis, algorithms, and engineering applications},
  volume    = {2}
}

@book{bertsekas1999nonlinear,
  author    = {Bertsekas, Dimitri P},
  publisher = {Athena scientific Belmont},
  date      = {1999},
  title     = {Nonlinear programming}
}

@book{bertsekas2005dynamic,
  author    = {Bertsekas, Dimitri P and Bertsekas, Dimitri P and Bertsekas, Dimitri P and Bertsekas, Dimitri P},
  publisher = {Athena scientific Belmont, MA},
  date      = {2005},
  number    = {3},
  title     = {Dynamic programming and optimal control},
  volume    = {1}
}

@book{bishop2007pattern,
  author    = {Bishop, Christopher},
  publisher = {Springer, New York},
  date      = {2007},
  title     = {Pattern Recognition and Machine Learning (Information Science and Statistics), 1st edn. 2006. corr. 2nd printing edn}
}

@book{bonnans2013perturbation,
  author    = {Bonnans, J Fr{\'{e}}d{\'{e}}ric and Shapiro, Alexander},
  publisher = {Springer Science \& Business Media},
  date      = {2013},
  title     = {Perturbation analysis of optimization problems}
}

@book{boyd2004convex,
  author    = {Boyd, Stephen and Vandenberghe, Lieven},
  publisher = {Cambridge university press},
  date      = {2004},
  title     = {Convex optimization}
}

@book{friedman2001elements,
  author    = {Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
  publisher = {Springer series in statistics New York, NY, USA:},
  date      = {2001},
  number    = {10},
  title     = {The elements of statistical learning},
  volume    = {1}
}

@book{goodfellow2016deep,
  author    = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua},
  publisher = {MIT press Cambridge},
  date      = {2016},
  title     = {Deep learning},
  volume    = {1}
}

@book{griewank2008evaluating,
  author    = {Griewank, Andreas and Walther, Andrea},
  publisher = {SIAM},
  date      = {2008},
  title     = {Evaluating derivatives: principles and techniques of algorithmic differentiation}
}

@book{koller2009probabilistic,
  author    = {Koller, Daphne and Friedman, Nir},
  publisher = {MIT press},
  date      = {2009},
  title     = {Probabilistic graphical models: principles and techniques}
}

@book{lamport1994latex,
  author    = {Lamport, Leslie},
  publisher = {Addison-wesley},
  date      = {1994},
  title     = {LATEX: a document preparation system: user's guide and reference manual}
}

@book{mckinney2012python,
  author    = {McKinney, Wes},
  publisher = {" O'Reilly Media, Inc."},
  date      = {2012},
  title     = {Python for data analysis: Data wrangling with Pandas, NumPy, and IPython}
}

@book{nocedal2006sequential,
  author    = {Nocedal, Jorge and Wright, Stephen J},
  publisher = {Springer},
  date      = {2006},
  title     = {Sequential quadratic programming}
}

@book{oliphant2006guide,
  author    = {Oliphant, Travis E},
  publisher = {Trelgol Publishing USA},
  date      = {2006},
  title     = {A guide to NumPy},
  volume    = {1}
}

@book{rockafellar2009variational,
  author    = {Rockafellar, R Tyrrell and Wets, Roger J-B},
  publisher = {Springer Science \& Business Media},
  date      = {2009},
  title     = {Variational analysis},
  volume    = {317}
}

@book{sastry2011adaptive,
  author    = {Sastry, Shankar and Bodson, Marc},
  publisher = {Courier Corporation},
  date      = {2011},
  title     = {Adaptive control: stability, convergence and robustness}
}

@book{sra2012optimization,
  author    = {Sra, Suvrit and Nowozin, Sebastian and Wright, Stephen J},
  publisher = {Mit Press},
  date      = {2012},
  title     = {Optimization for machine learning}
}

@book{stallman1981emacs,
  author    = {Stallman, Richard M},
  publisher = {ACM},
  date      = {1981},
  number    = {6},
  title     = {EMACS the extensible, customizable self-documenting display editor},
  volume    = {16}
}

@book{sutton1998reinforcement,
  author    = {Sutton, Richard S and Barto, Andrew G and others},
  publisher = {MIT press},
  date      = {1998},
  title     = {Reinforcement learning: An introduction}
}

@book{szeliski2010computer,
  author    = {Szeliski, Richard},
  publisher = {Springer Science \& Business Media},
  date      = {2010},
  title     = {Computer vision: algorithms and applications}
}

@book{van1995python,
  author    = {Van Rossum, Guido and Drake Jr, Fred L},
  publisher = {Centrum voor Wiskunde en Informatica Amsterdam},
  date      = {1995},
  title     = {Python reference manual}
}

@book{wasserman2013all,
  author    = {Wasserman, Larry},
  publisher = {Springer Science \& Business Media},
  date      = {2013},
  title     = {All of statistics: a concise course in statistical inference}
}

@book{wright1997primal,
  author    = {Wright, Stephen J},
  publisher = {Siam},
  date      = {1997},
  title     = {Primal-dual interior-point methods}
}

@incollection{bell2008algorithmic,
  author    = {Bell, Bradley M and Burke, James V},
  publisher = {Springer},
  booktitle = {Advances in Automatic Differentiation},
  date      = {2008},
  pages     = {67--77},
  title     = {Algorithmic differentiation of implicit functions and optimal values}
}

@incollection{grant2006disciplined,
  author    = {Grant, Michael and Boyd, Stephen and Ye, Yinyu},
  publisher = {Springer},
  booktitle = {Global optimization},
  date      = {2006},
  pages     = {155--210},
  title     = {Disciplined convex programming}
}

@incollection{smola2007bundle,
  author    = {Smola, Alex J. and Vishwanathan, S.v.n. and Le, Quoc V.},
  editor    = {Platt, J. C. and Koller, D. and Singer, Y. and Roweis, S. T.},
  publisher = {Curran Associates, Inc.},
  booktitle = {Advances in Neural Information Processing Systems 20},
  date      = {2008},
  pages     = {1377--1384},
  title     = {Bundle Methods for Machine Learning}
}

@inproceedings{abadi2016tensorflow,
  author    = {Abadi, Mart{\'{\i{}}}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others},
  booktitle = {OSDI},
  date      = {2016},
  pages     = {265--283},
  title     = {Tensorflow: a system for large-scale machine learning.},
  volume    = {16}
}

@inproceedings{abbeel2006using,
  author       = {Abbeel, Pieter and Quigley, Morgan and Ng, Andrew Y},
  organization = {ACM},
  booktitle    = {Proceedings of the 23rd international conference on Machine learning},
  date         = {2006},
  pages        = {1--8},
  title        = {Using inaccurate models in reinforcement learning}
}

@inproceedings{agarwal2011infinite,
  author       = {Agarwal, Shivani},
  organization = {SIAM},
  booktitle    = {Proceedings of the 2011 SIAM International Conference on Data Mining},
  date         = {2011},
  pages        = {839--850},
  title        = {The infinite push: A new support vector ranking algorithm that directly optimizes accuracy at the absolute top of the list}
}

@inproceedings{alexis2011quadrotor,
  author       = {Alexis, Kostas and Papachristos, Christos and Nikolakopoulos, George and Tzes, Anthony},
  organization = {IEEE},
  booktitle    = {Control \& Automation (MED), 2011 19th Mediterranean Conference on},
  date         = {2011},
  pages        = {1247--1252},
  title        = {Model predictive quadrotor indoor position control}
}

@inproceedings{ali2017semismooth,
  author       = {Ali, Alnur and Wong, Eric and Kolter, J. Zico},
  organization = {JMLR. org},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning-Volume 70},
  date         = {2017},
  pages        = {70--79},
  title        = {A semismooth Newton method for fast, generic convex programming}
}

@inproceedings{amos2017input,
  author    = {Amos, Brandon and Xu, Lei and Kolter, J. Zico},
  booktitle = {Proceedings of the International Conference on Machine Learning},
  date      = {2017},
  title     = {Input Convex Neural Networks}
}

@inproceedings{amos2017optnet,
  author    = {Amos, Brandon and Kolter, J. Zico},
  booktitle = {Proceedings of the International Conference on Machine Learning},
  date      = {2017},
  title     = {OptNet: Differentiable Optimization as a Layer in Neural Networks}
}

@inproceedings{amos2018differentiable,
  author    = {Amos, Brandon and Jimenez, Ivan and Sacks, Jacob and Boots, Byron and Kolter, J. Zico},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2018},
  pages     = {8299--8310},
  title     = {Differentiable MPC for End-to-end Planning and Control}
}

@inproceedings{amos2018learning,
  abstract  = {We consider the setting of an agent with a fixed body interacting with an unknown and uncertain external world. We show that models trained to predict proprioceptive information about the agent's body come to represent objects in the external world. In spite of being trained with only internally available signals, these dynamic body models come to represent external objects through the necessity of predicting their effects on the agent's own body. That is, the model learns holistic persistent representations of objects in the world, even though the only training signals are body signals. Our dynamics model is able to successfully predict distributions over 132 sensor readings over 100 steps into the future and we demonstrate that even when the body is no longer in contact with an object, the latent variables of the dynamics model continue to represent its shape. We show that active data collection by maximizing the entropy of predictions about the body---touch sensors, proprioception and vestibular information---leads to learning of dynamic models that show superior performance when used for control. We also collect data from a real robotic hand and show that the same models can be used to answer questions about properties of objects in the real world. Videos with qualitative results of our models are available <a href="https://goo.gl/mZuqAV">here</a>.},
  author    = {Amos, Brandon and Dinh, Laurent and Cabi, Serkan and Roth{\"{o}}rl, Thomas and Colmenarejo, Sergio G{\'{o}}mez and Muldal, Alistair and Erez, Tom and Tassa, Yuval and de Freitas, Nando and Denil, Misha},
  booktitle = {International Conference on Learning Representations},
  date      = {2018},
  title     = {Learning Awareness Models}
}

@inproceedings{amos2019lml,
  author       = {Amos, Brandon and Koltun, Vladlen and Kolter, J. Zico},
  date         = {2019},
  journaltitle = {Unpublished},
  title        = {{The Limited Multi-Label Projection Layer}}
}

@inproceedings{bagnell2001autonomous,
  author       = {Bagnell, J Andrew and Schneider, Jeff G},
  organization = {IEEE},
  booktitle    = {Robotics and Automation, 2001. Proceedings 2001 ICRA. IEEE International Conference on},
  date         = {2001},
  pages        = {1615--1620},
  title        = {Autonomous helicopter control using reinforcement learning policy search methods},
  volume       = {2}
}

@inproceedings{barron2016fast,
  author       = {Barron, Jonathan T and Poole, Ben},
  organization = {Springer},
  booktitle    = {European Conference on Computer Vision},
  date         = {2016},
  pages        = {617--632},
  title        = {The fast bilateral solver}
}

@inproceedings{battaglia2016interaction,
  author    = {Battaglia, Peter and Pascanu, Razvan and Lai, Matthew and Rezende, Danilo Jimenez and others},
  booktitle = {Advances in neural information processing systems},
  date      = {2016},
  pages     = {4502--4510},
  title     = {Interaction networks for learning about objects, relations and physics}
}

@inproceedings{belanger2016structured,
  author    = {Belanger, David and McCallum, Andrew},
  booktitle = {Proceedings of the International Conference on Machine Learning},
  date      = {2016},
  title     = {Structured prediction energy networks}
}

@inproceedings{belanger2017end,
  author    = {Belanger, David and Yang, Bishan and McCallum, Andrew},
  booktitle = {Proceedings of the International Conference on Machine Learning},
  date      = {2017},
  title     = {End-to-End Learning for Structured Prediction Energy Networks}
}

@inproceedings{bengio1994globally,
  author    = {Bengio, Yoshua and LeCun, Yann and Henderson, Donnie},
  booktitle = {Advances in neural information processing systems},
  date      = {1994},
  pages     = {937--944},
  title     = {Globally trained handwritten word recognizer using spatial representation, convolutional neural networks, and hidden Markov models}
}

@inproceedings{berrada2018smooth,
  author    = {Berrada, Leonard and Zisserman, Andrew and Kumar, M Pawan},
  booktitle = {Proceedings of the International Conference on Learning Representations},
  date      = {2018},
  title     = {Smooth Loss Functions for Deep Top-k Classification}
}

@inproceedings{boedecker2014sparsegps,
  author    = {Boedecker, Joschika and Springenberg, Jost Tobias and Wulfing, Jan and Riedmiller, Martin},
  booktitle = {IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)},
  date      = {2014},
  title     = {Approximate Real-Time Optimal Control Based on Sparse Gaussian Process Models}
}

@inproceedings{bouffard2012lbmpc,
  author    = {Bouffard, P. and Aswani, A. and Tomlin, C.},
  booktitle = {IEEE International Conference on Robotics and Automation},
  date      = {2012},
  title     = {Learning-based model predictive control on a quadrotor: Onboard implementation and experimental results}
}

@inproceedings{boyd2012accuracy,
  author    = {Boyd, Stephen and Cortes, Corinna and Mohri, Mehryar and Radovanovic, Ana},
  booktitle = {Advances in neural information processing systems},
  date      = {2012},
  pages     = {953--961},
  title     = {Accuracy at the top}
}

@inproceedings{chen2015early,
  abstract  = {A cognitive assistance application combines a wearable device such as Google Glass with cloudlet processing to provide step-by-step guidance on a complex task. In this paper, we focus on user assistance for narrow and well-defined tasks that require specialized knowledge and/or skills. We describe proof-of-concept implementations for four different tasks: assembling 2D Lego models, freehand sketching, playing ping-pong, and recommending context-relevant YouTube tutorials. We then reflect on the difficulties we faced in building these applications, and suggest future research that could simplify the creation of similar applications.},
  author    = {Chen, Zhuo and Jiang, Lu and Hu, Wenlu and Ha, Kiryong and Amos, Brandon and Pillai, Padmanabhan and Hauptmann, Alex and Satyanarayanan, Mahadev},
  booktitle = {WearSys},
  date      = {2015},
  title     = {{{Early Implementation Experience with Wearable Cognitive Assistance Applications}}}
}

@inproceedings{chen2015learning,
  author    = {Chen, Liang-Chieh and Schwing, Alexander G and Yuille, Alan L and Urtasun, Raquel},
  booktitle = {Proceedings of the International Conference on Machine Learning},
  date      = {2015},
  title     = {Learning deep structured models}
}

@inproceedings{chen2017empirical,
  author       = {Chen, Zhuo and Hu, Wenlu and Wang, Junjue and Zhao, Siyan and Amos, Brandon and Wu, Guanhang and Ha, Kiryong and Elgazzar, Khalid and Pillai, Padmanabhan and Klatzky, Roberta and Siewiorek, Daniel and Satyanarayanan, Mahadev},
  organization = {ACM},
  booktitle    = {Proceedings of the Second ACM/IEEE Symposium on Edge Computing},
  date         = {2017},
  pages        = {12},
  title        = {An Empirical Study of Latency in an Emerging Class of Edge Computing Applications for Wearable Cognitive Assistance}
}

@inproceedings{chen2017learning,
  author    = {Chen, Yutian and Hoffman, Matthew W and Colmenarejo, Sergio G{\'{o}}mez and Denil, Misha and Lillicrap, Timothy P and Botvinick, Matt and Freitas, Nando},
  booktitle = {International Conference on Machine Learning},
  date      = {2017},
  pages     = {748--756},
  title     = {Learning to learn without gradient descent by gradient descent}
}

@inproceedings{chen2018approximating,
  author       = {Chen, Steven and Saulnier, Kelsey and Atanasov, Nikolay and Lee, Daniel D and Kumar, Vijay and Pappas, George J and Morari, Manfred},
  organization = {IEEE},
  booktitle    = {2018 Annual American Control Conference (ACC)},
  date         = {2018},
  pages        = {1520--1527},
  title        = {Approximating Explicit Model Predictive Control Using Constrained Neural Networks}
}

@inproceedings{chen2018neural,
  author    = {Chen, Tian Qi and Rubanova, Yulia and Bettencourt, Jesse and Duvenaud, David K},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2018},
  pages     = {6572--6583},
  title     = {Neural ordinary differential equations}
}

@inproceedings{collobert2011torch7,
  author    = {Collobert, Ronan and Kavukcuoglu, Koray and Farabet, Cl{\'{e}}ment},
  booktitle = {BigLearn, NIPS Workshop},
  date      = {2011},
  number    = {EPFL-CONF-192376},
  title     = {Torch7: A matlab-like environment for machine learning}
}

@inproceedings{davies2016privacy,
  abstract  = {Unease over data privacy will retard consumer acceptance of IoT deployments. The primary source of discomfort is a lack of user control over raw data that is streamed directly from sensors to the cloud. This is a direct consequence of the over-centralization of today\textquoteright{}{}{}{}{}{}{}{}s cloud-based IoT hub designs. We propose a solution that interposes a locally-controlled software component called a privacy mediator on every raw sensor stream. Each mediator is in the same administrative domain as the sensors whose data is being collected, and dynamically enforces the current privacy policies of the owners of the sensors or mobile users within the domain. This solution necessitates a logical point of presence for mediators within the administrative boundaries of each organization. Such points of presence are provided by cloudlets, which are small locally-administered data centers at the edge of the Internet that can support code mobility. The use of cloudlet-based mediators aligns well with natural personal and organizational boundaries of trust and responsibility.},
  author    = {Davies, Nigel Andrew Justin and Taft, Nina and Satyanarayanan, Mahadev and Clinch, Sarah and Amos, Brandon},
  booktitle = {HotMobile},
  date      = {2016},
  title     = {{Privacy mediators: helping IoT cross the chasm}}
}

@inproceedings{de2018end,
  author    = {de Avila Belbute-Peres, Filipe and Smith, Kevin and Allen, Kelsey and Tenenbaum, Josh and Kolter, J. Zico},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2018},
  pages     = {7178--7189},
  title     = {End-to-end differentiable physics for learning and control}
}

@inproceedings{deisenroth2011pilco,
  author    = {Deisenroth, Marc and Rasmussen, Carl E},
  booktitle = {Proceedings of the 28th International Conference on machine learning (ICML-11)},
  date      = {2011},
  pages     = {465--472},
  title     = {PILCO: A model-based and data-efficient approach to policy search}
}

@inproceedings{devlin2017robustfill,
  author       = {Devlin, Jacob and Uesato, Jonathan and Bhupatiraju, Surya and Singh, Rishabh and Mohamed, Abdel-rahman and Kohli, Pushmeet},
  organization = {JMLR. org},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning-Volume 70},
  date         = {2017},
  pages        = {990--998},
  title        = {Robustfill: Neural program learning under noisy I/O}
}

@inproceedings{djolonga2017differentiable,
  author    = {Djolonga, Josip and Krause, Andreas},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2017},
  pages     = {1013--1023},
  title     = {Differentiable learning of submodular models}
}

@inproceedings{domahidi2013ecos,
  author       = {Domahidi, Alexander and Chu, Eric and Boyd, Stephen},
  organization = {IEEE},
  booktitle    = {Control Conference (ECC), 2013 European},
  date         = {2013},
  pages        = {3071--3076},
  title        = {ECOS: An SOCP solver for embedded systems}
}

@inproceedings{domke2012generic,
  author    = {Domke, Justin},
  booktitle = {AISTATS},
  date      = {2012},
  pages     = {318--326},
  title     = {Generic Methods for Optimization-Based Modeling.},
  volume    = {22}
}

@inproceedings{donti2017task,
  abstract  = {As machine learning techniques have become more ubiquitous, it has become common to see machine learning prediction algorithms operating within some larger process. However, the criteria by which we train machine learning algorithms often differ from the ultimate criteria on which we evaluate them. This paper proposes an end-to-end approach for learning probabilistic machine learning models within the context of stochastic programming, in a manner that directly captures the ultimate task-based objective for which they will be used. We then present two experimental evaluations of the proposed approach, one as applied to a generic inventory stock problem and the second to a real-world electrical grid scheduling task. In both cases, we show that the proposed approach can outperform both a traditional modeling approach and a purely black-box policy optimization approach.},
  author    = {Donti, Priya L and Amos, Brandon and Kolter, J. Zico},
  booktitle = {NIPS},
  date      = {2017},
  title     = {Task-based End-to-end Model Learning}
}

@inproceedings{duchi2008efficient,
  author    = {Duchi, John and Shalev-Shwartz, Shai and Singer, Yoram and Chandra, Tushar},
  booktitle = {Proceedings of the 25th international conference on Machine learning},
  date      = {2008},
  pages     = {272--279},
  title     = {Efficient projections onto the l 1-ball for learning in high dimensions}
}

@inproceedings{erez2012mpc,
  author    = {Erez, T. and Tassa, Y. and Todorov, E.},
  booktitle = {International Conference on Intelligent Robots and Systems},
  date      = {2012},
  title     = {Synthesis and stabilization of complex behaviors through online trajectory optimization}
}

@inproceedings{finn2016guided,
  author    = {Finn, Chelsea and Levine, Sergey and Abbeel, Pieter},
  booktitle = {International Conference on Machine Learning},
  date      = {2016},
  pages     = {49--58},
  title     = {Guided cost learning: Deep inverse optimal control via policy optimization}
}

@inproceedings{finn2017model,
  author       = {Finn, Chelsea and Abbeel, Pieter and Levine, Sergey},
  organization = {JMLR. org},
  booktitle    = {Proceedings of the 34th International Conference on Machine Learning-Volume 70},
  date         = {2017},
  pages        = {1126--1135},
  title        = {Model-agnostic meta-learning for fast adaptation of deep networks}
}

@inproceedings{foerster2018learning,
  author       = {Foerster, Jakob and Chen, Richard Y and Al-Shedivat, Maruan and Whiteson, Shimon and Abbeel, Pieter and Mordatch, Igor},
  organization = {International Foundation for Autonomous Agents and Multiagent Systems},
  booktitle    = {Proceedings of the 17th International Conference on Autonomous Agents and MultiAgent Systems},
  date         = {2018},
  pages        = {122--130},
  title        = {Learning with opponent-learning awareness}
}

@inproceedings{globerson2016collective,
  author    = {Globerson, Amir and Lazic, Nevena and Chakrabarti, Soumen and Subramanya, Amarnag and Ringaard, Michael and Pereira, Fernando},
  booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  date      = {2016},
  pages     = {621--631},
  title     = {Collective entity resolution with multi-focal attention},
  volume    = {1}
}

@inproceedings{glorot2011deep,
  author    = {Glorot, Xavier and Bordes, Antoine and Bengio, Yoshua},
  booktitle = {Proceedings of the fourteenth international conference on artificial intelligence and statistics},
  date      = {2011},
  pages     = {315--323},
  title     = {Deep sparse rectifier neural networks}
}

@inproceedings{goodfellow2013multi,
  author    = {Goodfellow, Ian and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2013},
  pages     = {548--556},
  title     = {Multi-prediction deep Boltzmann machines}
}

@inproceedings{goodfellow2014generative,
  author    = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2014},
  pages     = {2672--2680},
  title     = {Generative adversarial nets}
}

@inproceedings{goyal2018continuous,
  author    = {Goyal, Kartik and Neubig, Graham and Dyer, Chris and Berg-Kirkpatrick, Taylor},
  booktitle = {Thirty-Second AAAI Conference on Artificial Intelligence},
  date      = {2018},
  title     = {A continuous relaxation of beam search for end-to-end training of neural sequence models}
}

@inproceedings{gu2016continuous,
  author    = {Gu, Shixiang and Lillicrap, Timothy and Sutskever, Ilya and Levine, Sergey},
  booktitle = {Proceedings of the International Conference on Machine Learning},
  date      = {2016},
  title     = {Continuous Deep Q-Learning with Model-based Acceleration}
}

@inproceedings{ha2017you,
  author       = {Ha, Kiryong and Abe, Yoshihisa and Eiszler, Thomas and Chen, Zhuo and Hu, Wenlu and Amos, Brandon and Upadhyaya, Rohit and Pillai, Padmanabhan and Satyanarayanan, Mahadev},
  organization = {ACM},
  booktitle    = {Proceedings of the Second ACM/IEEE Symposium on Edge Computing},
  date         = {2017},
  pages        = {12},
  title        = {You can teach elephants to dance: agile VM handoff for edge computing}
}

@inproceedings{hamilton2017inductive,
  author    = {Hamilton, Will and Ying, Zhitao and Leskovec, Jure},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2017},
  pages     = {1024--1034},
  title     = {Inductive representation learning on large graphs}
}

@inproceedings{heess2015learning,
  author    = {Heess, Nicolas and Wayne, Gregory and Silver, David and Lillicrap, Tim and Erez, Tom and Tassa, Yuval},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2015},
  pages     = {2944--2952},
  title     = {Learning continuous control policies by stochastic value gradients}
}

@inproceedings{henderson2018deep,
  author    = {Henderson, Peter and Islam, Riashat and Bachman, Philip and Pineau, Joelle and Precup, Doina and Meger, David},
  booktitle = {Thirty-Second AAAI Conference on Artificial Intelligence},
  date      = {2018},
  title     = {Deep reinforcement learning that matters}
}

@inproceedings{hu2014case,
  abstract  = {When offloading computation from a mobile device, we show that it can pay to perform additional on-device work in order to reduce the offloading workload. We call this offload shaping, and demonstrate its application at many different levels of abstraction using a variety of techniques. We show that offload shaping can produce significant reduction in resource demand, with little loss of application-level fidelity},
  author    = {Hu, Wenlu and Amos, Brandon and Chen, Zhuo and Ha, Kiryong and Richter, Wolfgang and Pillai, Padmanabhan and Gilbert, Benjamin and Harkes, Jan and Satyanarayanan, Mahadev},
  booktitle = {HotMobile},
  date      = {2015},
  title     = {{{The Case for Offload Shaping}}}
}

@inproceedings{hu2016quantifying,
  author       = {Hu, Wenlu and Gao, Ying and Ha, Kiryong and Wang, Junjue and Amos, Brandon and Chen, Zhuo and Pillai, Padmanabhan and Satyanarayanan, Mahadev},
  organization = {ACM},
  booktitle    = {Proceedings of the 7th ACM SIGOPS Asia-Pacific Workshop on Systems},
  date         = {2016},
  pages        = {5},
  title        = {Quantifying the impact of edge computing on mobile applications}
}

@inproceedings{huang2017densely,
  author       = {Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and Weinberger, Kilian Q},
  organization = {IEEE},
  booktitle    = {2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  date         = {2017},
  pages        = {2261--2269},
  title        = {Densely connected convolutional networks}
}

@inproceedings{ioffe2015batch,
  author    = {Ioffe, Sergey and Szegedy, Christian},
  booktitle = {Proceedings of The 32nd International Conference on Machine Learning},
  date      = {2015},
  pages     = {448--456},
  title     = {Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift}
}

@inproceedings{johnson2015image,
  author    = {Johnson, Justin and Krishna, Ranjay and Stark, Michael and Li, Li-Jia and Shamma, David and Bernstein, Michael and Fei-Fei, Li},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  date      = {2015},
  pages     = {3668--3678},
  title     = {Image retrieval using scene graphs}
}

@inproceedings{johnson2016composing,
  author    = {Johnson, Matthew and Duvenaud, David K and Wiltschko, Alex and Adams, Ryan P and Datta, Sandeep R},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2016},
  pages     = {2946--2954},
  title     = {Composing graphical models with neural networks for structured representations and fast inference}
}

@inproceedings{kamel2015multicopter,
  author       = {Kamel, Mina and Alexis, Kostas and Achtelik, Markus and Siegwart, Roland},
  organization = {IEEE},
  booktitle    = {Control Applications (CCA), 2015 IEEE Conference on},
  date         = {2015},
  pages        = {1160--1166},
  title        = {Fast nonlinear model predictive control for multicopter attitude tracking on SO (3)}
}

@inproceedings{karkus2017qmdp,
  author    = {Karkus, Peter and Hsu, David and Lee, Wee Sun},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2017},
  pages     = {4697--4707},
  title     = {Qmdp-net: Deep learning for planning under partial observability}
}

@inproceedings{kluyver2016jupyter,
  author    = {Kluyver, Thomas and Ragan-Kelley, Benjamin and P{\'{e}}rez, Fernando and Granger, Brian E and Bussonnier, Matthias and Frederic, Jonathan and Kelley, Kyle and Hamrick, Jessica B and Grout, Jason and Corlay, Sylvain and others},
  booktitle = {ELPUB},
  date      = {2016},
  pages     = {87--90},
  title     = {Jupyter Notebooks-a publishing format for reproducible computational workflows.}
}

@inproceedings{kong2015kinematic,
  author       = {Kong, Jason and Pfeiffer, Mark and Schildbach, Georg and Borrelli, Francesco},
  organization = {IEEE},
  booktitle    = {Intelligent Vehicles Symposium (IV), 2015 IEEE},
  date         = {2015},
  pages        = {1094--1099},
  title        = {Kinematic and dynamic vehicle models for autonomous driving control design}
}

@inproceedings{krizhevsky2012imagenet,
  author    = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
  booktitle = {Advances in neural information processing systems},
  date      = {2012},
  pages     = {1097--1105},
  title     = {Imagenet classification with deep convolutional neural networks}
}

@inproceedings{kurutach2018learning,
  author    = {Kurutach, Thanard and Tamar, Aviv and Yang, Ge and Russell, Stuart J and Abbeel, Pieter},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2018},
  pages     = {8733--8744},
  title     = {Learning plannable representations with causal infogan}
}

@inproceedings{lapin2015top,
  author    = {Lapin, Maksim and Hein, Matthias and Schiele, Bernt},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2015},
  pages     = {325--333},
  title     = {Top-k multiclass SVM}
}

@inproceedings{lapin2016loss,
  author    = {Lapin, Maksim and Hein, Matthias and Schiele, Bernt},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  date      = {2016},
  pages     = {1468--1477},
  title     = {Loss functions for top-k error: Analysis and insights}
}

@inproceedings{Lenc_2015_CVPR,
  author    = {Lenc, Karel and Vedaldi, Andrea},
  booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
  date      = {2015-06},
  title     = {Understanding Image Representations by Measuring Their Equivariance and Equivalence}
}

@inproceedings{lenz2015deepmpc,
  author    = {Lenz, Ian and Knepper, Ross A and Saxena, Ashutosh},
  booktitle = {Robotics: Science and Systems},
  date      = {2015},
  title     = {DeepMPC: Learning Deep Latent Features for Model Predictive Control.}
}

@inproceedings{levine2013guided,
  author    = {Levine, Sergey and Koltun, Vladlen},
  booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)},
  date      = {2013},
  pages     = {1--9},
  title     = {Guided policy search}
}

@inproceedings{levine2014learning,
  author    = {Levine, Sergey and Abbeel, Pieter},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2014},
  pages     = {1071--1079},
  title     = {Learning neural network policies with guided policy search under unknown dynamics}
}

@inproceedings{li1994markov,
  author       = {Li, Stan Z},
  organization = {Springer},
  booktitle    = {European conference on computer vision},
  date         = {1994},
  pages        = {361--370},
  title        = {Markov random field models in computer vision}
}

@inproceedings{li2004ilqr,
  author = {Li, Weiwei and Todorov, Emanuel},
  date   = {2004},
  title  = {Iterative Linear Quadratic Regulator Design for Nonlinear Biological Movement Systems}
}

@inproceedings{li2014top,
  author    = {Li, Nan and Jin, Rong and Zhou, Zhi-Hua},
  booktitle = {Advances in neural information processing systems},
  date      = {2014},
  pages     = {1502--1510},
  title     = {Top rank optimization in linear time}
}

@inproceedings{li2018factorizable,
  author       = {Li, Yikang and Ouyang, Wanli and Zhou, Bolei and Shi, Jianping and Zhang, Chao and Wang, Xiaogang},
  organization = {Springer},
  booktitle    = {European Conference on Computer Vision},
  date         = {2018},
  pages        = {346--363},
  title        = {Factorizable net: an efficient subgraph-based framework for scene graph generation}
}

@inproceedings{liang2017deep,
  author       = {Liang, Xiaodan and Lee, Lisa and Xing, Eric P},
  organization = {IEEE},
  booktitle    = {Computer Vision and Pattern Recognition (CVPR), 2017 IEEE Conference on},
  date         = {2017},
  pages        = {4408--4417},
  title        = {Deep variation-structured reinforcement learning for visual relationship and attribute detection}
}

@inproceedings{liniger2014rccar,
  author    = {Liniger, Alexander and Domahidi, Alexander and Morari, Manfred},
  booktitle = {Optimal Control Applications and Methods},
  date      = {2014},
  edition   = {36},
  pages     = {628--647},
  title     = {Optimization-based autonomous racing of 1:43 scale RC cars}
}

@inproceedings{liting2017driving,
  author    = {Sun, Liting and Peng, Cheng and Zhan, Wei and Tomizuka, Masayoshi},
  booktitle = {arXiv preprint arXiv:1707.02515},
  date      = {2017},
  title     = {A Fast Integrated Planning and Control Framework for Autonomous Driving via Imitation Learning}
}

@inproceedings{long2015fully,
  author    = {Long, Jonathan and Shelhamer, Evan and Darrell, Trevor},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  date      = {2015},
  pages     = {3431--3440},
  title     = {Fully convolutional networks for semantic segmentation}
}

@inproceedings{mahendran2015understanding,
  author    = {Mahendran, Aravindh and Vedaldi, Andrea},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  date      = {2015},
  pages     = {5188--5196},
  title     = {Understanding deep image representations by inverting them}
}

@inproceedings{martins2016softmax,
  author    = {Martins, Andre and Astudillo, Ramon},
  booktitle = {International Conference on Machine Learning},
  date      = {2016},
  pages     = {1614--1623},
  title     = {From softmax to sparsemax: A sparse model of attention and multi-label classification}
}

@inproceedings{mnih2016asynchronous,
  author    = {Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
  booktitle = {International Conference on Machine Learning},
  date      = {2016},
  pages     = {1928--1937},
  title     = {Asynchronous methods for deep reinforcement learning}
}

@inproceedings{monti2017geometric,
  author    = {Monti, Federico and Boscaini, Davide and Masci, Jonathan and Rodola, Emanuele and Svoboda, Jan and Bronstein, Michael M},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  date      = {2017},
  pages     = {5115--5124},
  title     = {Geometric deep learning on graphs and manifolds using mixture model cnns}
}

@inproceedings{nagabandi2017mbmf,
  author    = {Nagabandi, Anusha and Kahn, Gregory and Fearing, Ronald S. and Levine, Sergey},
  booktitle = {arXiv preprint arXiv:1708.02596},
  date      = {2017},
  title     = {Neural Network Dynamics for Model-Based Deep Reinforcement Learning with Model-Free Fine-Tuning}
}

@inproceedings{nair2010rectified,
  author    = {Nair, Vinod and Hinton, Geoffrey E},
  booktitle = {Proceedings of the 27th International Conference on Machine Learning (ICML-10)},
  date      = {2010},
  pages     = {807--814},
  title     = {Rectified linear units improve restricted boltzmann machines}
}

@inproceedings{neunert2016slqmpc,
  author    = {Neunert, Michael and de Crousaz, Cedric and Furrer, Fardi and Kamel, Mina and Farshidian, Farbod and Siegwart, Roland and Buchli, Jonas},
  booktitle = {ICRA},
  date      = {2016},
  title     = {{Fast Nonlinear Model Predictive Control for Unified Trajectory Optimization and Tracking}}
}

@inproceedings{newell2017pixels,
  author    = {Newell, Alejandro and Deng, Jia},
  booktitle = {Advances in neural information processing systems},
  date      = {2017},
  pages     = {2171--2180},
  title     = {Pixels to graphs by associative embedding}
}

@inproceedings{ng2000algorithms,
  author    = {Ng, Andrew Y and Russell, Stuart J and others},
  booktitle = {Icml},
  date      = {2000},
  pages     = {2},
  title     = {Algorithms for inverse reinforcement learning.},
  volume    = {1}
}

@inproceedings{niculae2017regularized,
  author    = {Niculae, Vlad and Blondel, Mathieu},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2017},
  pages     = {3338--3348},
  title     = {A regularized framework for sparse and structured neural attention}
}

@inproceedings{oh2017value,
  author    = {Oh, Junhyuk and Singh, Satinder and Lee, Honglak},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2017},
  pages     = {6120--6130},
  title     = {Value prediction network}
}

@inproceedings{peng2009conditional,
  author    = {Peng, Jian and Bo, Liefeng and Xu, Jinbo},
  booktitle = {Advances in neural information processing systems},
  date      = {2009},
  pages     = {1419--1427},
  title     = {Conditional neural fields}
}

@inproceedings{pham2018optlayer,
  author       = {Pham, Tu-Hoa and De Magistris, Giovanni and Tachibana, Ryuki},
  organization = {IEEE},
  booktitle    = {2018 IEEE International Conference on Robotics and Automation (ICRA)},
  date         = {2018},
  pages        = {6236--6243},
  title        = {Optlayer-practical constrained optimization for deep reinforcement learning in the real world}
}

@inproceedings{plummer2017phrase,
  author    = {Plummer, Bryan A and Mallya, Arun and Cervantes, Christopher M and Hockenmaier, Julia and Lazebnik, Svetlana},
  booktitle = {Proc. ICCV},
  date      = {2017},
  title     = {Phrase localization and visual relationship detection with comprehensive image-language cues}
}

@inproceedings{poon2011sum,
  author    = {Poon, Hoifung and Domingos, Pedro},
  booktitle = {{UAI} 2011, Proceedings of the Twenty-Seventh Conference on Uncertainty in Artificial Intelligence, Barcelona, Spain, July 14-17, 2011},
  date      = {2011},
  pages     = {337--346},
  title     = {Sum-product networks: A new deep architecture}
}

@inproceedings{ratliff2007approximate,
  author    = {Ratliff, Nathan D and Bagnell, J Andrew and Zinkevich, Martin},
  booktitle = {International Conference on Artificial Intelligence and Statistics},
  date      = {2007},
  pages     = {380--387},
  title     = {(Approximate) Subgradient Methods for Structured Prediction}
}

@inproceedings{regin1996generalized,
  author       = {R{\'{e}}gin, Jean-Charles},
  organization = {AAAI Press},
  booktitle    = {Proceedings of the thirteenth national conference on Artificial intelligence-Volume 1},
  date         = {1996},
  pages        = {209--215},
  title        = {Generalized arc consistency for global cardinality constraint}
}

@inproceedings{ross2011reduction,
  author    = {Ross, St{\'{e}}phane and Gordon, Geoffrey and Bagnell, Drew},
  booktitle = {Proceedings of the fourteenth international conference on artificial intelligence and statistics},
  date      = {2011},
  pages     = {627--635},
  title     = {A reduction of imitation learning and structured prediction to no-regret online learning}
}

@inproceedings{rothvoss2014matching,
  author       = {Rothvo\ss{}, Thomas},
  organization = {ACM},
  booktitle    = {Proceedings of the 46th annual ACM symposium on theory of computing},
  date         = {2014},
  pages        = {263--272},
  title        = {The matching polytope has exponential extension complexity}
}

@inproceedings{sabour2017dynamic,
  author    = {Sabour, Sara and Frosst, Nicholas and Hinton, Geoffrey E},
  booktitle = {Advances in neural information processing systems},
  date      = {2017},
  pages     = {3856--3866},
  title     = {Dynamic routing between capsules}
}

@inproceedings{samaria1994parameterisation,
  author       = {Samaria, Ferdinando S and Harter, Andy C},
  organization = {IEEE},
  booktitle    = {Applications of Computer Vision, 1994., Proceedings of the Second IEEE Workshop on},
  date         = {1994},
  pages        = {138--142},
  title        = {Parameterisation of a stochastic model for human face identification}
}

@inproceedings{santa2017deeppermnet,
  author       = {Santa Cruz, Rodrigo and Fernando, Basura and Cherian, Anoop and Gould, Stephen},
  organization = {IEEE},
  booktitle    = {Computer Vision and Pattern Recognition (CVPR), 2017 IEEE Conference on},
  date         = {2017},
  pages        = {6044--6052},
  title        = {Deeppermnet: Visual permutation learning}
}

@inproceedings{schmidt2014shrinkage,
  author    = {Schmidt, Uwe and Roth, Stefan},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  date      = {2014},
  pages     = {2774--2781},
  title     = {Shrinkage fields for effective image restoration}
}

@inproceedings{schneider1997exploiting,
  author    = {Schneider, Jeff G},
  booktitle = {Advances in neural information processing systems},
  date      = {1997},
  pages     = {1047--1053},
  title     = {Exploiting model uncertainty estimates for safe dynamic control learning}
}

@inproceedings{schroff2015facenet,
  author    = {Schroff, Florian and Kalenichenko, Dmitry and Philbin, James},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  date      = {2015},
  pages     = {815--823},
  title     = {Facenet: A unified embedding for face recognition and clustering}
}

@inproceedings{schulman2015trust,
  author    = {Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},
  booktitle = {Proceedings of the 32nd International Conference on Machine Learning (ICML-15)},
  date      = {2015},
  pages     = {1889--1897},
  title     = {Trust region policy optimization}
}

@inproceedings{shah2018airsim,
  author       = {Shah, Shital and Dey, Debadeepta and Lovett, Chris and Kapoor, Ashish},
  organization = {Springer},
  booktitle    = {Field and Service Robotics},
  date         = {2018},
  pages        = {621--635},
  title        = {Airsim: High-fidelity visual and physical simulation for autonomous vehicles}
}

@inproceedings{silver2014deterministic,
  author    = {Silver, David and Lever, Guy and Heess, Nicolas and Degris, Thomas and Wierstra, Daan and Riedmiller, Martin},
  booktitle = {Proceedings of the International Conference on Machine Learning},
  date      = {2014},
  pages     = {387--395},
  title     = {Deterministic Policy Gradient Algorithms}
}

@inproceedings{simard1991reverse,
  author       = {Simard, Patrice and LeCun, Yann},
  organization = {Citeseer},
  booktitle    = {Advances in Neural Information Processing Systems},
  date         = {1991},
  pages        = {579--588},
  title        = {Reverse TDNN: an architecture for trajectory generation}
}

@inproceedings{stewart2007xmonad,
  author       = {Stewart, Don and Sjanssen, Spencer},
  organization = {ACM},
  booktitle    = {Proceedings of the ACM SIGPLAN workshop on Haskell workshop},
  date         = {2007},
  pages        = {119--119},
  title        = {Xmonad}
}

@inproceedings{stoyanov2011empirical,
  author    = {Stoyanov, Veselin and Ropson, Alexander and Eisner, Jason},
  booktitle = {AISTATS},
  date      = {2011},
  pages     = {725--733},
  title     = {Empirical Risk Minimization of Graphical Model Parameters Given Approximate Inference, Decoding, and Model Structure.}
}

@inproceedings{sukhbaatar2015end,
  author    = {Sukhbaatar, Sainbayar and Weston, Jason and Fergus, Rob and others},
  booktitle = {Advances in neural information processing systems},
  date      = {2015},
  pages     = {2440--2448},
  title     = {End-to-end memory networks}
}

@inproceedings{sutton1990integrated,
  author    = {Sutton, Richard S},
  booktitle = {Proceedings of the seventh international conference on machine learning},
  date      = {1990},
  pages     = {216--224},
  title     = {Integrated architectures for learning, planning, and reacting based on approximating dynamic programming}
}

@inproceedings{szegedy2015going,
  author    = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  date      = {2015},
  pages     = {1--9},
  title     = {Going deeper with convolutions}
}

@inproceedings{taigman2014deepface,
  author    = {Taigman, Yaniv and Yang, Ming and Ranzato, Marc'Aurelio and Wolf, Lior},
  booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition},
  date      = {2014},
  pages     = {1701--1708},
  title     = {Deepface: Closing the gap to human-level performance in face verification}
}

@inproceedings{tamar2016value,
  author    = {Tamar, Aviv and Wu, Yi and Thomas, Garrett and Levine, Sergey and Abbeel, Pieter},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2016},
  pages     = {2154--2162},
  title     = {Value iteration networks}
}

@inproceedings{tamar2017learning,
  author       = {Tamar, Aviv and Thomas, Garrett and Zhang, Tianhao and Levine, Sergey and Abbeel, Pieter},
  organization = {IEEE},
  booktitle    = {Robotics and Automation (ICRA), 2017 IEEE International Conference on},
  date         = {2017},
  pages        = {336--343},
  title        = {Learning from the hindsight plan---Episodic MPC improvement}
}

@inproceedings{tappen2007learning,
  author       = {Tappen, Marshall F and Liu, Ce and Adelson, Edward H and Freeman, William T},
  organization = {IEEE},
  booktitle    = {Computer Vision and Pattern Recognition, 2007. CVPR'07. IEEE Conference on},
  date         = {2007},
  pages        = {1--8},
  title        = {Learning gaussian conditional random fields for low-level vision}
}

@inproceedings{taskar2004max,
  author    = {Taskar, Ben and Guestrin, Carlos and Koller, Daphne},
  booktitle = {Advances in neural information processing systems},
  date      = {2004},
  pages     = {25--32},
  title     = {Max-margin Markov networks}
}

@inproceedings{taskar2005learning,
  author       = {Taskar, Ben and Chatalbashev, Vassil and Koller, Daphne and Guestrin, Carlos},
  organization = {ACM},
  booktitle    = {Proceedings of the 22nd International Conference on Machine Learning},
  date         = {2005},
  pages        = {896--903},
  title        = {Learning structured prediction models: A large margin approach}
}

@inproceedings{tassa2014control,
  author       = {Tassa, Yuval and Mansard, Nicolas and Todorov, Emo},
  organization = {IEEE},
  booktitle    = {Robotics and Automation (ICRA), 2014 IEEE International Conference on},
  date         = {2014},
  pages        = {1168--1175},
  title        = {Control-limited differential dynamic programming}
}

@inproceedings{todorov2012mujoco,
  author       = {Todorov, Emanuel and Erez, Tom and Tassa, Yuval},
  organization = {IEEE},
  booktitle    = {2012 IEEE/RSJ International Conference on Intelligent Robots and Systems},
  date         = {2012},
  pages        = {5026--5033},
  title        = {MuJoCo: A physics engine for model-based control}
}

@inproceedings{ulyanov2018deep,
  author    = {Ulyanov, Dmitry and Vedaldi, Andrea and Lempitsky, Victor},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  date      = {2018},
  pages     = {9446--9454},
  title     = {Deep image prior}
}

@inproceedings{vaswani2017attention,
  author    = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L{}}ukasz and Polosukhin, Illia},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2017},
  pages     = {5998--6008},
  title     = {Attention is all you need}
}

@inproceedings{venkatraman2016improved,
  author       = {Venkatraman, Arun and Capobianco, Roberto and Pinto, Lerrel and Hebert, Martial and Nardi, Daniele and Bagnell, J Andrew},
  organization = {Springer},
  booktitle    = {International Symposium on Experimental Robotics},
  date         = {2016},
  pages        = {703--713},
  title        = {Improved learning of dynamics models for control}
}

@inproceedings{wang2016proximal,
  author    = {Wang, Shenlong and Fidler, Sanja and Urtasun, Raquel},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2016},
  pages     = {865--873},
  title     = {Proximal deep structured models}
}

@inproceedings{wang2017scalable,
  author       = {Wang, Junjue and Amos, Brandon and Das, Anupam and Pillai, Padmanabhan and Sadeh, Norman and Satyanarayanan, Mahadev},
  organization = {ACM},
  booktitle    = {Proceedings of the 8th ACM on Multimedia Systems Conference},
  date         = {2017},
  pages        = {38--49},
  title        = {A Scalable and Privacy-Aware IoT Service for Live Video Analytics}
}

@inproceedings{wang2018non,
  author    = {Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  date      = {2018},
  pages     = {7794--7803},
  title     = {Non-local neural networks}
}

@inproceedings{watter2015embed,
  author    = {Watter, Manuel and Springenberg, Jost and Boedecker, Joschka and Riedmiller, Martin},
  booktitle = {Advances in neural information processing systems},
  date      = {2015},
  pages     = {2746--2754},
  title     = {Embed to control: A locally linear latent dynamics model for control from raw images}
}

@inproceedings{williams2016aggressive,
  author       = {Williams, Grady and Drews, Paul and Goldfain, Brian and Rehg, James M and Theodorou, Evangelos A},
  organization = {IEEE},
  booktitle    = {Robotics and Automation (ICRA), 2016 IEEE International Conference on},
  date         = {2016},
  pages        = {1433--1440},
  title        = {Aggressive driving with model predictive path integral control}
}

@inproceedings{woo2018linknet,
  author    = {Woo, Sanghyun and Kim, Dahun and Cho, Donghyeon and Kweon, In So},
  booktitle = {Advances in Neural Information Processing Systems},
  date      = {2018},
  pages     = {558--568},
  title     = {LinkNet: Relational Embedding for Scene Graph}
}

@inproceedings{xie2017ddp,
  author    = {Xie, Zhaoming and Liu, C. Karen and Hauser, Kris},
  booktitle = {International Conference on Robotics and Automation (ICRA)},
  date      = {2017},
  title     = {{Differential Dynamic Programming with Nonlinear Constraints}}
}

@inproceedings{xiong2016dynamic,
  author    = {Xiong, Caiming and Merity, Stephen and Socher, Richard},
  booktitle = {International conference on machine learning},
  date      = {2016},
  pages     = {2397--2406},
  title     = {Dynamic memory networks for visual and textual question answering}
}

@inproceedings{xu2017scene,
  author    = {Xu, Danfei and Zhu, Yuke and Choy, Christopher B and Fei-Fei, Li},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  date      = {2017},
  title     = {Scene graph generation by iterative message passing},
  volume    = {2}
}

@inproceedings{zaheer2017deep,
  author    = {Zaheer, Manzil and Kottur, Satwik and Ravanbakhsh, Siamak and Poczos, Barnabas and Salakhutdinov, Ruslan R and Smola, Alexander J},
  booktitle = {Advances in neural information processing systems},
  date      = {2017},
  pages     = {3391--3401},
  title     = {Deep sets}
}

@inproceedings{zeiler2014visualizing,
  author       = {Zeiler, Matthew D and Fergus, Rob},
  organization = {Springer},
  booktitle    = {European conference on computer vision},
  date         = {2014},
  pages        = {818--833},
  title        = {Visualizing and understanding convolutional networks}
}

@inproceedings{zellers2018neural,
  author    = {Zellers, Rowan and Yatskar, Mark and Thomson, Sam and Choi, Yejin},
  booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  date      = {2018},
  pages     = {5831--5840},
  title     = {Neural Motifs: Scene Graph Parsing with Global Context}
}

@inproceedings{zhao2016collapsed,
  abstract  = {Sum-Product Networks (SPNs) are probabilistic inference machines that admit exact inference in linear time in the size of the network. Existing parameter learning approaches for SPNs are largely based on the maximum likelihood principle and hence are subject to overfitting compared to more Bayesian approaches. Exact Bayesian posterior inference for SPNs is computationally intractable. Both standard variational inference and posterior sampling for SPNs are computationally infeasible even for networks of moderate size due to the large number of local latent variables per instance. In this work, we propose a novel deterministic collapsed variational inference algorithm for SPNs that is computationally efficient, easy to implement and at the same time allows us to incorporate prior information into the optimization formulation. Extensive experiments show a significant improvement in accuracy compared with a maximum likelihood based approach.},
  author    = {Zhao, Han and Adel, Tameem and Gordon, Geoff and Amos, Brandon},
  booktitle = {ICML},
  date      = {2016},
  title     = {{{Collapsed Variational Inference for Sum-Product Networks}}}
}

@inproceedings{zheng2015conditional,
  author    = {Zheng, Shuai and Jayasumana, Sadeep and Romera-Paredes, Bernardino and Vineet, Vibhav and Su, Zhizhong and Du, Dalong and Huang, Chang and Torr, Philip HS},
  booktitle = {Proceedings of the IEEE International Conference on Computer Vision},
  date      = {2015},
  pages     = {1529--1537},
  title     = {Conditional random fields as recurrent neural networks}
}

@inproceedings{zhu2016generative,
  author    = {Zhu, Jun-Yan and Kr{\"{a}}henb{\"{u}}hl, Philipp and Shechtman, Eli and Efros, Alexei A.},
  booktitle = {Proceedings of European Conference on Computer Vision (ECCV)},
  date      = {2016},
  title     = {Generative Visual Manipulation on the Natural Image Manifold}
}

@misc{boyd2008lqr,
  author       = {Boyd, Stephen},
  date         = {2008},
  howpublished = {Stanford EE 363: Linear Dynamical Systems},
  title        = {LQR via Lagrange multipliers}
}

@misc{lecun1998mnist,
  author = {LeCun, Yann and Cortes, Corinna and Burges, Christopher JC},
  date   = {1998},
  title  = {The {MNIST} database of handwritten digits}
}

@misc{levine2017introduction,
  author       = {Levine, Sergey},
  date         = {2017},
  howpublished = {Berkeley CS 294-112: Deep Reinforcement Learning},
  title        = {Introduction to Reinforcement Learning}
}

@misc{levine2017optimal,
  author       = {Levine, Sergey},
  date         = {2017},
  howpublished = {Berkeley CS 294-112: Deep Reinforcement Learning},
  title        = {Optimal Control and Planning}
}

@report{amos2016openface,
  abstract    = {Cameras are becoming ubiquitous in the Internet of Things (IoT) and can use face recognition technology to improve context. There is a large accuracy gap between today's publicly available face recognition systems and the state-of-the-art private face recognition systems. This paper presents our OpenFace face recognition library that bridges this accuracy gap. We show that OpenFace provides near-human accuracy on the LFW benchmark and present a new classification benchmark for mobile scenarios. This paper is intended for non-experts interested in using OpenFace and provides a light introduction to the deep neural network techniques we use. We released OpenFace in October 2015 as an open source library under the Apache 2.0 license. It is available at: <http://cmusatyalab.github.io/openface/>},
  author      = {Amos, Brandon and Ludwiczuk, Bartosz and Satyanarayanan, Mahadev},
  institution = {Technical Report CMU-CS-16-118, CMU School of Computer Science},
  date        = {2016},
  title       = {{{OpenFace: A general-purpose face recognition library with mobile applications}}},
  type        = {techreport}
}

@report{gao2015cloudlets,
  abstract    = {We present experimental results from Wi-Fi and 4G LTE networks to validate the intuition that low end-to-end latency of cloud services improves application response time and reduces energy consumption on mobile devices. We focus specifically on computational offloading as a cloud service. Using a wide range of applications, and exploring both pre-partitioned and dynamically partitioned approaches, we demonstrate the importance of low latency for cloud offload services. We show the best performance is achieved by offloading to cloudlets, which are small-scale edge-located data centers. Our results show that cloudlets can improve response times 51\% and reduce energy consumption in a mobile device by up to 42\% compared to cloud offload.},
  author      = {Gao, Ying and Hu, Wenlu and Ha, Kiryong and Amos, Brandon and Pillai, Padmanabhan and Satyanarayanan, Mahadev},
  institution = {Technical Report CMU-CS-15-139, CMU School of Computer Science},
  date        = {2015},
  title       = {Are Cloudlets Necessary?},
  type        = {techreport}
}

@report{ha2015adaptive,
  abstract    = {Cloudlet offload is a valuable technique for ensuring low end-to-end latency of resource-intensive cloud processing for many emerging mobile applications. This paper examines the impact of user mobility on cloudlet offload, and shows that even modest user mobility can result in significant network degradation. We propose VM handoff as a technique for seamlessly transferring VM-encapsulated execution to a more optimal offload site as users move. Our approach can perform handoff in roughly a minute even over limited WANs by adaptively reducing data transferred. We present experimental results to validate our implementation and to demonstrate effectiveness of adaptation to changing network conditions and processing capacity},
  author      = {Ha, Kiryong and Abe, Yoshihisa and Chen, Zhuo and Hu, Wenlu and Amos, Brandon and Pillai, Padmanabhan and Satyanarayanan, Mahadev},
  institution = {Technical Report CMU-CS-15-113, CMU School of Computer Science},
  date        = {2015},
  title       = {Adaptive VM handoff across cloudlets},
  type        = {techreport}
}

@report{krizhevsky2009learning,
  author      = {Krizhevsky, Alex and Hinton, Geoffrey},
  institution = {Citeseer},
  date        = {2009},
  title       = {Learning multiple layers of features from tiny images},
  type        = {techreport}
}

@report{lin1993reinforcement,
  author      = {Lin, Long-Ji},
  institution = {DTIC Document},
  date        = {1993},
  title       = {Reinforcement learning for robots using neural networks},
  type        = {techreport}
}

@thesis{belanger2017deep,
  author      = {Belanger, David},
  institution = {University of Massachusetts Amherst},
  date        = {2017},
  title       = {Deep Energy-Based Models for Structured Prediction},
  type        = {phdthesis}
}

@thesis{jordan2015convex,
  author = {Jordan-Squire, Christopher},
  date   = {2015},
  title  = {Convex Optimization over Probability Measures},
  type   = {phdthesis}
}

@thesis{watkins1989learning,
  author      = {Watkins, Christopher John Cornish Hellaby},
  institution = {University of Cambridge England},
  date        = {1989},
  title       = {Learning from delayed rewards},
  type        = {phdthesis}
}

@book{brent1973algorithms,
  added-at = {2019-03-11T21:00:05.000+0100},
  address = {Englewood Cliffs, New Jersey},
  author = {Brent, Richard P.},
  biburl = {https://www.bibsonomy.org/bibtex/22b836ac21011aef44906c3ee6ff28201/fairybasslet},
  citeulike-article-id = {12635009},
  edition = {1st},
  interhash = {4c0d2a19c5c5b2136c03a82e05bae0b9},
  intrahash = {2b836ac21011aef44906c3ee6ff28201},
  publisher = {Prentice-Hall},
  timestamp = {2019-03-11T21:06:37.000+0100},
  title = {{Algorithms for Minimization without Derivatives}},
  year = 1973
}