-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathreferences.bib
102 lines (94 loc) · 4.8 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
@inproceedings{Schulman2015,
abstract = {In a variety of problems originating in supervised, unsupervised, and reinforcement learning, the loss function is defined by an expectation over a collection of random variables, which might be part of a probabilistic model or the external world. Estimating the gradient of this loss function, using samples, lies at the core of gradient-based learning algorithms for these problems. We introduce the formalism of stochastic computation graphs-directed acyclic graphs that include both deterministic functions and conditional probability distributions- and describe how to easily and automatically derive an unbiased estimator of the loss function's gradient. The resulting algorithm for computing the gradient estimator is a simple modification of the standard backpropagation algorithm. The generic scheme we propose unifies estimators derived in variety of prior work, along with variance-reduction techniques therein. It could assist researchers in developing intricate models involving a combination of stochastic and deterministic operations, enabling, for example, attention, memory, and control actions.},
archivePrefix = {arXiv},
arxivId = {1506.05254},
author = {Schulman, John and Heess, Nicolas and Weber, Theophane and Abbeel, Pieter},
booktitle = {Advances in Neural Information Processing Systems},
eprint = {1506.05254},
issn = {10495258},
title = {{Gradient estimation using stochastic computation graphs}},
year = {2015}
}
@article{mohamed2019monte,
title={Monte carlo gradient estimation in machine learning},
author={Mohamed, Shakir and Rosca, Mihaela and Figurnov, Michael and Mnih, Andriy},
journal={arXiv preprint arXiv:1906.10652},
year={2019}
}
@book{glasserman1991gradient,
title={Gradient estimation via perturbation analysis},
author={Glasserman, Paul and Ho, Yu-Chi},
volume={116},
year={1991},
publisher={Springer Science \& Business Media}
}
@article{kingma2013auto,
title={Auto-encoding variational bayes},
author={Kingma, Diederik P and Welling, Max},
journal={arXiv preprint arXiv:1312.6114},
year={2013}
}
@article{jang2016categorical,
title={Categorical reparameterization with gumbel-softmax},
author={Jang, Eric and Gu, Shixiang and Poole, Ben},
journal={arXiv preprint arXiv:1611.01144},
year={2016}
}
@article{maddison2016concrete,
title={The concrete distribution: A continuous relaxation of discrete random variables},
author={Maddison, Chris J and Mnih, Andriy and Teh, Yee Whye},
journal={arXiv preprint arXiv:1611.00712},
year={2016}
}
@article{weber2019credit,
title={Credit assignment techniques in stochastic computation graphs},
author={Weber, Th{\'e}ophane and Heess, Nicolas and Buesing, Lars and Silver, David},
journal={arXiv preprint arXiv:1901.01761},
year={2019}
}
@article{haarnoja2018soft,
title={Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor},
author={Haarnoja, Tuomas and Zhou, Aurick and Abbeel, Pieter and Levine, Sergey},
journal={arXiv preprint arXiv:1801.01290},
year={2018}
}
@article{lillicrap2015continuous,
title={Continuous control with deep reinforcement learning},
author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
journal={arXiv preprint arXiv:1509.02971},
year={2015}
}
@misc{lewis2020retrievalaugmented,
title={Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks},
author={Patrick Lewis and Ethan Perez and Aleksandara Piktus and Fabio Petroni and Vladimir Karpukhin and Naman Goyal and Heinrich Küttler and Mike Lewis and Wen-tau Yih and Tim Rocktäschel and Sebastian Riedel and Douwe Kiela},
year={2020},
eprint={2005.11401},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@article{bunel2018leveraging,
title={Leveraging grammar and reinforcement learning for neural program synthesis},
author={Bunel, Rudy and Hausknecht, Matthew and Devlin, Jacob and Singh, Rishabh and Kohli, Pushmeet},
journal={arXiv preprint arXiv:1805.04276},
year={2018}
}
@inproceedings{liang2018memory,
title={Memory augmented policy optimization for program synthesis and semantic parsing},
author={Liang, Chen and Norouzi, Mohammad and Berant, Jonathan and Le, Quoc V and Lao, Ni},
booktitle={Advances in Neural Information Processing Systems},
pages={9994--10006},
year={2018}
}
@inproceedings{deng2018latent,
title={Latent alignment and variational attention},
author={Deng, Yuntian and Kim, Yoon and Chiu, Justin and Guo, Demi and Rush, Alexander},
booktitle={Advances in Neural Information Processing Systems},
pages={9712--9724},
year={2018}
}
@article{doersch2016tutorial,
title={Tutorial on variational autoencoders},
author={Doersch, Carl},
journal={arXiv preprint arXiv:1606.05908},
year={2016}
}