latex/template.tex

\documentclass{article}

\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{hyperref}
\usepackage{tabu}
\usepackage{graphicx}
\usepackage{placeins}
\usepackage[margin=1in]{geometry}
\usepackage{float}
\usepackage{mathtools}
\usepackage{textcomp}
\usepackage{pdfpages}
\usepackage{minted}
\usepackage[style=iso]{datetime2}
\usepackage{bbm}
\usepackage[flushleft]{threeparttable}
\usepackage{tikz}
\usetikzlibrary{bayesnet}
\usepackage{braket}
\usepackage{cancel}
\usepackage{enumitem}
\usepackage{subcaption}
\usepackage{wrapfig}
\usepackage{titling}
\usepackage[font=small,labelfont=bf]{caption}
\usepackage{subcaption}
\usepackage{syntax}
\usepackage{listings}
\include{pddl}
\lstset{
    language=PDDL,
    escapeinside={(*}{*)},
}
\DeclareFontShape{OT1}{cmtt}{bx}{n}{<5><6><7><8><9><10><10.95><12><14.4><17.28><20.74><24.88>cmttb10}{}


\usepackage[american]{babel}
\usepackage{csquotes}
% \usepackage[style=apa,backend=biber]{biblatex}
% \DeclareLanguageMapping{american}{american-apa}
% \bibliography{references} % .bib file
% \nocite{*}

\setlength{\droptitle}{-8em}

% \bibliographystyle{apacite}

\DeclareMathOperator*{\argmin}{arg\,min}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\EV}{E}
\DeclareMathOperator*{\var}{var}
\DeclareMathOperator*{\tr}{tr}
\DeclareMathOperator*{\mathspan}{span}
\newcommand{\MAT}[1]{\begin{bmatrix} #1 \end{bmatrix}}
\newcommand{\sMAT}[1]{\left(\begin{smallmatrix} #1 \end{smallmatrix}\right)}
\DeclareMathOperator*{\vecop}{vec}
\newcommand{\norm}[1]{\left\Vert #1 \right\Vert}
\newcommand{\fnorm}[1]{\left\Vert #1 \right\Vert_F}
\newcommand{\indep}{\perp \!\!\! \perp}
\newcommand{\sol}{{\bf Solution: }}
\newcommand{\soln}{\sol}
\newcommand{\solution}{\sol}
\newcommand{\TODO}{{\bf \color{red} TODO: THIS}}

% \renewcommand{\dateseparator}{--}

\newcommand{\figref}[1]{Figure~\ref{#1}}

\pagestyle{empty} \addtolength{\textwidth}{1.0in}
\addtolength{\textheight}{0.5in} \addtolength{\oddsidemargin}{-0.5in}
\addtolength{\evensidemargin}{-0.5in}
\newcommand{\ruleskip}{\bigskip\hrule\bigskip}
\newcommand{\nodify}[1]{{\sc #1}} \newcommand{\points}[1]{{\textbf{[#1
points]}}}

\newcommand{\bitem}{\begin{list}{$\bullet$}%
{\setlength{\itemsep}{0pt}\setlength{\topsep}{0pt}%
\setlength{\rightmargin}{0pt}}} \newcommand{\eitem}{\end{list}}

\newcommand{\G}{\mathcal{G}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\LL}{\mathcal{L}}

%\newcommand{\bE}{\mbox{\boldmath $E$}}
%\newcommand{\be}{\mbox{\boldmath $e$}}
%\newcommand{\bU}{\mbox{\boldmath $U$}}
%\newcommand{\bu}{\mbox{\boldmath $u$}}
%\newcommand{\bQ}{\mbox{\boldmath $Q$}}
%\newcommand{\bq}{\mbox{\boldmath $q$}}
%\newcommand{\bX}{\mbox{\boldmath $X$}}
%\newcommand{\bY}{\mbox{\boldmath $Y$}}
%\newcommand{\bZ}{\mbox{\boldmath $Z$}}
%\newcommand{\bx}{\mbox{\boldmath $x$}}
%\newcommand{\by}{\mbox{\boldmath $y$}}
%\newcommand{\bz}{\mbox{\boldmath $z$}}

\newcommand{\true}{\mbox{true}}
\newcommand{\Parents}{\mbox{Parents}}

\newcommand{\ww}{{\bf w}}
\newcommand{\xx}{{\bf x}}
\newcommand{\yy}{{\bf y}}
\newcommand{\real}{\ensuremath{\mathbb{R}}}

\newcommand{\dsl}[1]{{\it $\langle$#1$\rangle$}}
\newcommand*{\fullref}[1]{\hyperref[{#1}]{\autoref*{#1} (\nameref*{#1})}} % One single link

\newcommand{\eat}[1]{}
\newcommand{\CInd}[3]{({#1} \perp {#2} \mid {#3})}
\newcommand{\Ind}[2]{({#1} \perp {#2})}

\setlength{\parindent}{0pt} \setlength{\parskip}{0.5ex}

% \title{Homework 12} %  \vspace{-3cm} % before name to raise
% \subtitle{MATH-GA 2110}
% \vspace{-5cm} % before name to raise
\title{Goals as Reward-Generating Programs Domain Specific Language}
% \author{Guy Davidson}
% \date{\today}


\begin{document}
\maketitle

{{BODY}}

\section{Modal Definitions in Linear Temporal Logic}
\label{sec:LTL}
\subsection{Linear Temporal Logic definitions}
We offer a mapping between the temporal sequence functions defined in \fullref{sec:constraints} and linear temporal logic (LTL) operators.
As we were creating this DSL, we found that the syntax of the \dsl{then} operator felt more convenient than directly writing down LTL, but we hope the mapping helps reason about how we see our temporal operators functioning.
LTL offers the following operators, using $\varphi$ and $\psi$ as the symbols (in our case, predicates).
Assume the following formulas operate sequence of states $S_0, S_1, \cdots, S_n$:
\begin{itemize}
    \item \textbf{Next}, $X \psi$: at the next timestep, $\psi$ will be true. If we are at timestep $i$, then $S_{i+1} \vdash \psi$

    \item \textbf{Finally}, $F \psi$: at some future timestep, $\psi$ will be true. If we are at timestep $i$, then $\exists j > i:  S_{j} \vdash \psi$

    \item \textbf{Globally}, $G \psi$: from this timestep on, $\psi$ will be true. If we are at timestep $i$, then $\forall j: j \geq i: S_{j} \vdash \psi$

    \item \textbf{Until}, $\psi U \varphi$: $\psi$ will be true from the current timestep until a timestep at which $\varphi$ is true. If we are at timestep $i$, then $\exists j > i: \forall k: i \leq k < j: S_k \vdash \psi$, and $S_j \vdash \varphi$.
    \item \textbf{Strong release}, $\psi M \varphi$: the same as until, but demanding that both $\psi$ and $\varphi$ are true simultaneously: If we are at timestep $i$, then $\exists j > i: \forall k: i \leq k \leq j: S_k \vdash \psi$, and $S_j \vdash \varphi$.

    \textit{Aside:} there's also a \textbf{weak until}, $\psi W \varphi$, which allows for the case where the second is never true, in which case the first must hold for the rest of the sequence. Formally, if we are at timestep $i$, \textit{if} $\exists j > i: \forall k: i \leq k < j: S_k \vdash \psi$, and $S_j \vdash \varphi$, and otherwise, $\forall k \geq i: S_k \vdash \psi$. Similarly there's \textbf{release}, which is the similar variant of strong release. We're leaving those two as an aside since we don't know we'll need them.

\end{itemize}

\subsection{Satisfying a \dsl{then} operator}
Formally, to satisfy a preference using a \dsl{then} operator, we're looking to find a sub-sequence of $S_0, S_1, \cdots, S_n$ that satisfies the formula we translate to.
We translate a \dsl{then} operator by translating the constituent sequence-functions (\dsl{once}, \dsl{hold}, \dsl{while-hold})\footnote{These are the ones we've used so far in the interactive experiment dataset, even if we previously defined other ones, too.} to LTL.
Since the translation of each individual sequence function leaves the last operand empty, we append a `true' ($\top$) as the final operand, since we don't care what happens in the state after the sequence is complete.

(once $\psi$) := $\psi X \cdots$

(hold $\psi$) := $\psi U \cdots$

(hold-while $\psi$ $\alpha$ $\beta$ $\cdots \nu$) := ($\psi M \alpha) X (\psi M \beta) X \cdots X (\psi M \nu) X \psi U \cdots$ where the last $\psi U \cdots$ allows for additional states satisfying $\psi$ until the next modal is satisfied.

For example, a sequence such as the following, which signifies a throw attempt:
\begin{lstlisting}
(then
    (once (agent_holds ?b))
    (hold (and (not (agent_holds ?b)) (in_motion ?b)))
    (once (not (in_motion ?b)))
)
\end{lstlisting}
Can be translated to LTL using $\psi:=$ (agent\_holds ?b), $\varphi:=$ (in\_motion ?b) as:

$\psi X (\neg \psi \wedge \varphi) U (\neg \varphi) X \top $

Here's another example:
\begin{lstlisting}
(then
    (once (agent_holds ?b))  (* \color{blue} $\alpha$*)
    (hold-while
        (and (not (agent_holds ?b)) (in_motion ?b)) (* \color{blue} $\beta$ *)
        (touch ?b ?r) (* \color{blue} $\gamma$*)
    )
    (once  (and (in ?h ?b) (not (in_motion ?b)))) (* \color{blue} $\delta$*)
)
\end{lstlisting}
If we translate each predicate to the letter appearing in blue at the end of the line, this translates to:

$\alpha X (\beta M \gamma) X \beta U \delta X \top$

% \subsection{Satisfying (at-end ...) operators}
% Thankfully, the other type of temporal specification we find ourselves using as part of preferences is much simpler to translate.
% Satisfying an (at-end ...) operator does not require any temporal logic, since the predicate it operates over is evaluated at the terminal state of gameplay.
% The (always ...) operator is equivalent to the LTL globally operator: (always $\psi$) := $G \psi$, with the added constraint that we begin at the first timestep of gameplay.


% I'll attempt to check slightly more formally at some point, but I don't think we end up with many structures that are more complex than this.
% The predicate end up being rather more complex, but that doesn't matter to the LTL translation.

% \section{Modal Definitions}

% \begin{itemize}
%     \item These definitions attempt to offer precision on how the (then ...) operator works. It receives a series of sequence-functions (once, hold, etc.), each of which is parameterized by one or more predicate conditions.

%     \item For the inner sequence-functions, I used the parentheses notation to mean "evaluated at these timesteps" -- does this notation make sense? Should I also use it for the entire then-expression?

%     \item I've only provided here the for the ones currently used in the interactive experiment.
% \end{itemize}

% $(\text{then}\ \langle SF_1 \rangle \ \langle SF_2 \rangle \cdots \langle SF_n \rangle) := \exists t_0 \leq t_1 < t_2 < \cdots < t_n$ such that $SF_1(t_0, t_1) \land SF_2(t_1, t_2) \land \cdots \land SF_n(t_{n-1}, t_n) = \text{true}$, that is, each seq-func evaluated at these timesteps evaluates to true.

% $(\text{once}\ \langle C \rangle)(t_{i-1}, t_i) := t_i = t_{i-1} + 1, S[t_i] \vdash C$, that is, the condition C holds at the next timestep from the previous assigned timestep.

% $(\text{hold}\ \langle C \rangle)(t_{i-1}, t_i) := \forall t:  t_{i-1} < t \leq t_i, S[t] \vdash C$, that is, the condition holds for all timesteps starting immediately after the previous timestep and until the current timestep.

% $(\text{hold-while}\ \langle C \rangle \ \langle C_a \rangle \cdots \langle C_m \rangle)(t_{i-1}, t_i) := \forall t:  t_{i-1} < t \leq t_i, S[t] \vdash C$ and $\exists t_a, \cdots, t_m: t_{i-1} < t_a < \cdots < t_m < t_i$ such that $S[t_a] \vdash C_a, \cdots, S[t_m] \vdash C_m$, that is, the same as hold happens, and while this condition $C$ holds, there exist non-overlapping states in sequence where each of the additional conditions provided hold for at least a single state.

% $(\text{hold-for}\ \langle n \rangle \ \langle C \rangle)(t_{i-1}, t_i) := t_i \geq t_{i-1} + n, \forall t:  t_{i-1} < t \leq t_i, S[t] \vdash C$, that is, the same as the standard hold but for at least $n$ timesteps.

% $(\text{forall-sequence}\ \langle \text{forall-quantifier(s)} \rangle \ \langle \text{then-expr} \rangle)(t_{i-1}, t_i): \forall o \in \{a, b, \cdots, k\}$ satisfying the object assignments in the forall quantifier, $\exists t_0^o, t_1^o, \cdots, t_m^o$ that satisfy the inner then expression, such that $t_{i-1} < t_0^a < \cdots t_m^a < t_0^b < \cdots t_m^b < \cdots < t_0^k < \cdots t_m^k < t_i$, that is, the series of timesteps satisfying the inner then-expression for each object assignment do not overlap, happen in sequence, and fall between the previous assigned timestep and the current assigned timestep.

% \section{Open Questions}
% \begin{itemize}
%     \item Do we want to define syntax to quantify streaks? Some participants will use language like ``every three successful scores in a row get you a point''. An alternative to defining syntax or sequences would be to define the preference to count three successful attempts in a row, but that might be more awkward?

%     \item How do we want to work with type hierarchy, such as block or ball being the super-types for all blocks or balls -- is it an implicit (either ...) over all of the sub-types? Or do we want to provide the hierarchy in some way to the model, perhaps as part of the enumeration of all valid types in a given environment/scene?

%     \item (I'm sure there are more open questions -- will add later)
% \end{itemize}

\end{document}