-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtemplate.tex
228 lines (176 loc) · 11.7 KB
/
template.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
\documentclass{article}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{hyperref}
\usepackage{tabu}
\usepackage{graphicx}
\usepackage{placeins}
\usepackage[margin=1in]{geometry}
\usepackage{float}
\usepackage{mathtools}
\usepackage{textcomp}
\usepackage{pdfpages}
\usepackage{minted}
\usepackage[style=iso]{datetime2}
\usepackage{bbm}
\usepackage[flushleft]{threeparttable}
\usepackage{tikz}
\usetikzlibrary{bayesnet}
\usepackage{braket}
\usepackage{cancel}
\usepackage{enumitem}
\usepackage{subcaption}
\usepackage{wrapfig}
\usepackage{titling}
\usepackage[font=small,labelfont=bf]{caption}
\usepackage{subcaption}
\usepackage{syntax}
\usepackage{listings}
\include{pddl}
\lstset{
language=PDDL,
escapeinside={(*}{*)},
}
\DeclareFontShape{OT1}{cmtt}{bx}{n}{<5><6><7><8><9><10><10.95><12><14.4><17.28><20.74><24.88>cmttb10}{}
\usepackage[american]{babel}
\usepackage{csquotes}
% \usepackage[style=apa,backend=biber]{biblatex}
% \DeclareLanguageMapping{american}{american-apa}
% \bibliography{references} % .bib file
% \nocite{*}
\setlength{\droptitle}{-8em}
% \bibliographystyle{apacite}
\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\EV}{E}
\DeclareMathOperator*{\var}{var}
\DeclareMathOperator*{\tr}{tr}
\DeclareMathOperator*{\mathspan}{span}
\newcommand{\MAT}[1]{\begin{bmatrix} #1 \end{bmatrix}}
\newcommand{\sMAT}[1]{\left(\begin{smallmatrix} #1 \end{smallmatrix}\right)}
\DeclareMathOperator*{\vecop}{vec}
\newcommand{\norm}[1]{\left\Vert #1 \right\Vert}
\newcommand{\fnorm}[1]{\left\Vert #1 \right\Vert_F}
\newcommand{\indep}{\perp \!\!\! \perp}
\newcommand{\sol}{{\bf Solution: }}
\newcommand{\soln}{\sol}
\newcommand{\solution}{\sol}
\newcommand{\TODO}{{\bf \color{red} TODO: THIS}}
% \renewcommand{\dateseparator}{--}
\newcommand{\figref}[1]{Figure~\ref{#1}}
\pagestyle{empty} \addtolength{\textwidth}{1.0in}
\addtolength{\textheight}{0.5in} \addtolength{\oddsidemargin}{-0.5in}
\addtolength{\evensidemargin}{-0.5in}
\newcommand{\ruleskip}{\bigskip\hrule\bigskip}
\newcommand{\nodify}[1]{{\sc #1}} \newcommand{\points}[1]{{\textbf{[#1
points]}}}
\newcommand{\bitem}{\begin{list}{$\bullet$}%
{\setlength{\itemsep}{0pt}\setlength{\topsep}{0pt}%
\setlength{\rightmargin}{0pt}}} \newcommand{\eitem}{\end{list}}
\newcommand{\G}{\mathcal{G}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\LL}{\mathcal{L}}
%\newcommand{\bE}{\mbox{\boldmath $E$}}
%\newcommand{\be}{\mbox{\boldmath $e$}}
%\newcommand{\bU}{\mbox{\boldmath $U$}}
%\newcommand{\bu}{\mbox{\boldmath $u$}}
%\newcommand{\bQ}{\mbox{\boldmath $Q$}}
%\newcommand{\bq}{\mbox{\boldmath $q$}}
%\newcommand{\bX}{\mbox{\boldmath $X$}}
%\newcommand{\bY}{\mbox{\boldmath $Y$}}
%\newcommand{\bZ}{\mbox{\boldmath $Z$}}
%\newcommand{\bx}{\mbox{\boldmath $x$}}
%\newcommand{\by}{\mbox{\boldmath $y$}}
%\newcommand{\bz}{\mbox{\boldmath $z$}}
\newcommand{\true}{\mbox{true}}
\newcommand{\Parents}{\mbox{Parents}}
\newcommand{\ww}{{\bf w}}
\newcommand{\xx}{{\bf x}}
\newcommand{\yy}{{\bf y}}
\newcommand{\real}{\ensuremath{\mathbb{R}}}
\newcommand{\dsl}[1]{{\it $\langle$#1$\rangle$}}
\newcommand*{\fullref}[1]{\hyperref[{#1}]{\autoref*{#1} (\nameref*{#1})}} % One single link
\newcommand{\eat}[1]{}
\newcommand{\CInd}[3]{({#1} \perp {#2} \mid {#3})}
\newcommand{\Ind}[2]{({#1} \perp {#2})}
\setlength{\parindent}{0pt} \setlength{\parskip}{0.5ex}
% \title{Homework 12} % \vspace{-3cm} % before name to raise
% \subtitle{MATH-GA 2110}
% \vspace{-5cm} % before name to raise
\title{Goals as Reward-Generating Programs Domain Specific Language}
% \author{Guy Davidson}
% \date{\today}
\begin{document}
\maketitle
{{BODY}}
\section{Modal Definitions in Linear Temporal Logic}
\label{sec:LTL}
\subsection{Linear Temporal Logic definitions}
We offer a mapping between the temporal sequence functions defined in \fullref{sec:constraints} and linear temporal logic (LTL) operators.
As we were creating this DSL, we found that the syntax of the \dsl{then} operator felt more convenient than directly writing down LTL, but we hope the mapping helps reason about how we see our temporal operators functioning.
LTL offers the following operators, using $\varphi$ and $\psi$ as the symbols (in our case, predicates).
Assume the following formulas operate sequence of states $S_0, S_1, \cdots, S_n$:
\begin{itemize}
\item \textbf{Next}, $X \psi$: at the next timestep, $\psi$ will be true. If we are at timestep $i$, then $S_{i+1} \vdash \psi$
\item \textbf{Finally}, $F \psi$: at some future timestep, $\psi$ will be true. If we are at timestep $i$, then $\exists j > i: S_{j} \vdash \psi$
\item \textbf{Globally}, $G \psi$: from this timestep on, $\psi$ will be true. If we are at timestep $i$, then $\forall j: j \geq i: S_{j} \vdash \psi$
\item \textbf{Until}, $\psi U \varphi$: $\psi$ will be true from the current timestep until a timestep at which $\varphi$ is true. If we are at timestep $i$, then $\exists j > i: \forall k: i \leq k < j: S_k \vdash \psi$, and $S_j \vdash \varphi$.
\item \textbf{Strong release}, $\psi M \varphi$: the same as until, but demanding that both $\psi$ and $\varphi$ are true simultaneously: If we are at timestep $i$, then $\exists j > i: \forall k: i \leq k \leq j: S_k \vdash \psi$, and $S_j \vdash \varphi$.
\textit{Aside:} there's also a \textbf{weak until}, $\psi W \varphi$, which allows for the case where the second is never true, in which case the first must hold for the rest of the sequence. Formally, if we are at timestep $i$, \textit{if} $\exists j > i: \forall k: i \leq k < j: S_k \vdash \psi$, and $S_j \vdash \varphi$, and otherwise, $\forall k \geq i: S_k \vdash \psi$. Similarly there's \textbf{release}, which is the similar variant of strong release. We're leaving those two as an aside since we don't know we'll need them.
\end{itemize}
\subsection{Satisfying a \dsl{then} operator}
Formally, to satisfy a preference using a \dsl{then} operator, we're looking to find a sub-sequence of $S_0, S_1, \cdots, S_n$ that satisfies the formula we translate to.
We translate a \dsl{then} operator by translating the constituent sequence-functions (\dsl{once}, \dsl{hold}, \dsl{while-hold})\footnote{These are the ones we've used so far in the interactive experiment dataset, even if we previously defined other ones, too.} to LTL.
Since the translation of each individual sequence function leaves the last operand empty, we append a `true' ($\top$) as the final operand, since we don't care what happens in the state after the sequence is complete.
(once $\psi$) := $\psi X \cdots$
(hold $\psi$) := $\psi U \cdots$
(hold-while $\psi$ $\alpha$ $\beta$ $\cdots \nu$) := ($\psi M \alpha) X (\psi M \beta) X \cdots X (\psi M \nu) X \psi U \cdots$ where the last $\psi U \cdots$ allows for additional states satisfying $\psi$ until the next modal is satisfied.
For example, a sequence such as the following, which signifies a throw attempt:
\begin{lstlisting}
(then
(once (agent_holds ?b))
(hold (and (not (agent_holds ?b)) (in_motion ?b)))
(once (not (in_motion ?b)))
)
\end{lstlisting}
Can be translated to LTL using $\psi:=$ (agent\_holds ?b), $\varphi:=$ (in\_motion ?b) as:
$\psi X (\neg \psi \wedge \varphi) U (\neg \varphi) X \top $
Here's another example:
\begin{lstlisting}
(then
(once (agent_holds ?b)) (* \color{blue} $\alpha$*)
(hold-while
(and (not (agent_holds ?b)) (in_motion ?b)) (* \color{blue} $\beta$ *)
(touch ?b ?r) (* \color{blue} $\gamma$*)
)
(once (and (in ?h ?b) (not (in_motion ?b)))) (* \color{blue} $\delta$*)
)
\end{lstlisting}
If we translate each predicate to the letter appearing in blue at the end of the line, this translates to:
$\alpha X (\beta M \gamma) X \beta U \delta X \top$
% \subsection{Satisfying (at-end ...) operators}
% Thankfully, the other type of temporal specification we find ourselves using as part of preferences is much simpler to translate.
% Satisfying an (at-end ...) operator does not require any temporal logic, since the predicate it operates over is evaluated at the terminal state of gameplay.
% The (always ...) operator is equivalent to the LTL globally operator: (always $\psi$) := $G \psi$, with the added constraint that we begin at the first timestep of gameplay.
% I'll attempt to check slightly more formally at some point, but I don't think we end up with many structures that are more complex than this.
% The predicate end up being rather more complex, but that doesn't matter to the LTL translation.
% \section{Modal Definitions}
% \begin{itemize}
% \item These definitions attempt to offer precision on how the (then ...) operator works. It receives a series of sequence-functions (once, hold, etc.), each of which is parameterized by one or more predicate conditions.
% \item For the inner sequence-functions, I used the parentheses notation to mean "evaluated at these timesteps" -- does this notation make sense? Should I also use it for the entire then-expression?
% \item I've only provided here the for the ones currently used in the interactive experiment.
% \end{itemize}
% $(\text{then}\ \langle SF_1 \rangle \ \langle SF_2 \rangle \cdots \langle SF_n \rangle) := \exists t_0 \leq t_1 < t_2 < \cdots < t_n$ such that $SF_1(t_0, t_1) \land SF_2(t_1, t_2) \land \cdots \land SF_n(t_{n-1}, t_n) = \text{true}$, that is, each seq-func evaluated at these timesteps evaluates to true.
% $(\text{once}\ \langle C \rangle)(t_{i-1}, t_i) := t_i = t_{i-1} + 1, S[t_i] \vdash C$, that is, the condition C holds at the next timestep from the previous assigned timestep.
% $(\text{hold}\ \langle C \rangle)(t_{i-1}, t_i) := \forall t: t_{i-1} < t \leq t_i, S[t] \vdash C$, that is, the condition holds for all timesteps starting immediately after the previous timestep and until the current timestep.
% $(\text{hold-while}\ \langle C \rangle \ \langle C_a \rangle \cdots \langle C_m \rangle)(t_{i-1}, t_i) := \forall t: t_{i-1} < t \leq t_i, S[t] \vdash C$ and $\exists t_a, \cdots, t_m: t_{i-1} < t_a < \cdots < t_m < t_i$ such that $S[t_a] \vdash C_a, \cdots, S[t_m] \vdash C_m$, that is, the same as hold happens, and while this condition $C$ holds, there exist non-overlapping states in sequence where each of the additional conditions provided hold for at least a single state.
% $(\text{hold-for}\ \langle n \rangle \ \langle C \rangle)(t_{i-1}, t_i) := t_i \geq t_{i-1} + n, \forall t: t_{i-1} < t \leq t_i, S[t] \vdash C$, that is, the same as the standard hold but for at least $n$ timesteps.
% $(\text{forall-sequence}\ \langle \text{forall-quantifier(s)} \rangle \ \langle \text{then-expr} \rangle)(t_{i-1}, t_i): \forall o \in \{a, b, \cdots, k\}$ satisfying the object assignments in the forall quantifier, $\exists t_0^o, t_1^o, \cdots, t_m^o$ that satisfy the inner then expression, such that $t_{i-1} < t_0^a < \cdots t_m^a < t_0^b < \cdots t_m^b < \cdots < t_0^k < \cdots t_m^k < t_i$, that is, the series of timesteps satisfying the inner then-expression for each object assignment do not overlap, happen in sequence, and fall between the previous assigned timestep and the current assigned timestep.
% \section{Open Questions}
% \begin{itemize}
% \item Do we want to define syntax to quantify streaks? Some participants will use language like ``every three successful scores in a row get you a point''. An alternative to defining syntax or sequences would be to define the preference to count three successful attempts in a row, but that might be more awkward?
% \item How do we want to work with type hierarchy, such as block or ball being the super-types for all blocks or balls -- is it an implicit (either ...) over all of the sub-types? Or do we want to provide the hierarchy in some way to the model, perhaps as part of the enumeration of all valid types in a given environment/scene?
% \item (I'm sure there are more open questions -- will add later)
% \end{itemize}
\end{document}