Formal Grammars and BNF.tex

% Uncomment for handout
%\def\HANDOUT{}

% language selection
%\def\PYTHON{}
\def\CPP{}

\ifdefined\HANDOUT
\documentclass[]{beamer}
\usepackage{pgfpages}
\pgfpagesuselayout{4 on 1}[letterpaper,landscape,border shrink=5mm]
\else
\documentclass{beamer}
\fi

\mode<presentation>
{
  \usetheme{Warsaw}
  \definecolor{sered}{rgb}{0.78, 0.06, 0.18}
  \definecolor{richblack}{rgb}{0.0, 0.0, 0.0}
  \setbeamercolor{structure}{fg=sered,bg=richblack}
  %\setbeamercovered{transparent}
}


\usepackage[english]{babel}
\usepackage[latin1]{inputenc}
\usepackage{times}
\usepackage[T1]{fontenc}
\usepackage{tikz}
\usepackage{graphicx}
\usepackage[export]{adjustbox}
\usepackage{fancyvrb}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{esvect}

\makeatletter
\newcommand{\imagesource}[1]{{\centering\hfill\break\hbox{\scriptsize Image Source:\thinspace{\tiny\itshape #1}}\par}}
\newcommand{\image}[3][\@nil]{%
        \def\tmp{#1}%
        \begin{center}
        \ifx\tmp\@nnil
            \includegraphics[max height = 0.55\textheight, max width = \textwidth]{images/#2}
        \else
            \includegraphics[max height = 0.50\textheight, max width = \textwidth]{images/#2}
            \linebreak
            #1
        \fi
        \linebreak
        {\tiny Image Source:\thinspace{\tiny #3}}
        \end{center}
}

\newenvironment{code}{%
 \VerbatimEnvironment
 \begin{adjustbox}{max width=\textwidth, max height=0.7\textheight}
 \begin{BVerbatim}
  }{
  \end{BVerbatim}
 \end{adjustbox}
}

\title{Formal Grammars and BNF}


\author{Robert Lowe}

\institute[Southeast Missouri State University] % (optional, but mostly needed)
{
  Department of Computer Science\\
  Southeast Missouri State University
}

\date[]{}
\subject{}

\pgfdeclareimage[height=1.0cm]{university-logo}{images/semo-logo}
\logo{\pgfuseimage{university-logo}}


\AtBeginSection[]
{
  \begin{frame}<beamer>{Outline}
    \tableofcontents[currentsection]
  \end{frame}
}


\begin{document}

\begin{frame}
  \titlepage
\end{frame}

\begin{frame}{Outline}
  \tableofcontents
\end{frame}


% Structuring a talk is a difficult task and the following structure
% may not be suitable. Here are some rules that apply for this
% solution: 

% - Exactly two or three sections (other than the summary).
% - At *most* three subsections per section.
% - Talk about 30s to 2min per frame. So there should be between about
%   15 and 30 frames, all told.

% - A conference audience is likely to know very little of what you
%   are going to talk about. So *simplify*!
% - In a 20min talk, getting the main ideas across is hard
%   enough. Leave out details, even if it means being less precise than
%   you think necessary.
% - If you omit details that are vital to the proof/implementation,
%   just say so once. Everybody will be happy with that.

\section{Languages and Grammar}

\begin{frame}{Why use formalisms?}
    \begin{itemize}
        \item Precise Specifications
        \item Mathematical Validation of Implementations
        \item Accurate Documentation of a Language
    \end{itemize}
\end{frame}

\begin{frame}{Language Definition}
\[
L = \{\textrm{Set of all strings in language L}\}
\]

    \begin{itemize}
        \item A language is a set of strings.
        \item Most interesting languages are infinite in length.
        \item The strings within a language must be identifiable. That is, given a string
          $s$, the following most hold:
          \[
          \forall s,\  s \in L \vee s \notin L
          \]
    \end{itemize}
\end{frame}

\begin{frame}{Formal Grammar}
    \[
    L = \{ s \in \Sigma^* : s\ \textrm{is generated by G} \}
    \]
    \[
        s \in L \iff s\ \textrm{is generated by G}
    \]
    \begin{itemize}
        \item A {\bf formal grammar} $G$ is a set of rules which generates the strings in $L$.
        \item $\Sigma$ is the {\bf alphabet} of the language.
        \item $\Sigma^*$ is the {\bf Kleene Closure} of the alphabet.
        \begin{itemize}
            \item Set of all strings of all lengths consisting of the symbols in $\Sigma$
            \item Example $\{a,b\}^* = \{\emptyset, a, b, aa, ab, ba, bb, aab, aba, \ldots \}$
        \end{itemize}
    \end{itemize}
\end{frame}

\begin{frame}[fragile]{Example Formal Grammar}
    \begin{columns}
    \column{0.3\textwidth}
    Rules in $G$
    \begin{enumerate}
        \item $s \rightarrow e$
        \item $e \rightarrow e + e$
        \item $e \rightarrow e - e$
        \item $e \rightarrow e * e$
        \item $e \rightarrow e / e$
        \item $e \rightarrow n$
        \item $n \rightarrow nd $
        \item $n \rightarrow d $
        \item $d \rightarrow 0 $
        \item $d \rightarrow 1 $
    \end{enumerate}
    \column{0.7\textwidth}
    Some of the strings generated by $G$
    \begin{verbatim}
    0
    01
    10
    11
    100
    101
    11111111110000011111
    1+1
    10+11
    101+111*11
    1+1-1+1-1+1
    \end{verbatim}
    \end{columns}
\end{frame}

\begin{frame}{Components of a Grammar}
    \[
    G = \langle \Sigma, N, S, P\rangle
    \]
    \begin{itemize}
        \item $\Sigma$ --- Set of {\bf Terminal Symbols}
        \item $N$ --- Set of {\bf Non-Terminal Symbols}
        \item $S$ --- The {\bf Start Symbol} 
        \item $P$ --- Set of {\bf Production Rules}
    \end{itemize}
\end{frame}

\begin{frame}{Full Formal Example}
    \[
    G = \langle \Sigma, N, S, P \rangle
    \]
    \begin{itemize}
        \item $\Sigma=\{+,-,*,/,0,1\}$
        \item $N=\{s,e,n,d\}$
        \item $S=s$
        \item $P=\{ s \rightarrow e, e \rightarrow e + e, e \rightarrow e - e, e \rightarrow e * e,%
                    e \rightarrow e / e,e \rightarrow n,n \rightarrow nd,n \rightarrow d,%
                    d \rightarrow 0, d \rightarrow 1\}$
    \end{itemize}
\end{frame}

\begin{frame}[fragile]{Generating Strings from a Grammar}
General algorithm framework:
\vspace{0.25in}

\begin{code}
1.)   s = S
2.)   while s contains a non-terminal:
3.)      select a sub-string and matching replacement rule
4.)      replace the sub-string with replacement rule
\end{code}

\vspace{0.25in}
\end{frame}

\begin{frame}[fragile]{Recursive Random Generation}
\begin{columns}
\column{0.5\textwidth}


\ifdefined\PYTHON
\begin{block}{Python Excerpt}
\begin{code}
def e():
    i = randint(1, 5)
    if i == 1:
        return e() + '+' + e()
    elif i == 2:
        return e() + '-' + e()
    elif i == 3:
        return e() + '*' + e()
    elif i == 4:
        return e() + '/' + e()
    else:
        return n()
\end{code}
\end{block}
\fi

\ifdefined\CPP
\begin{block}{C++ Excerpt}
\begin{code}
std::string e()
{
    int rule = select_rule(2,6);

    if(rule == 2) {
        return e() + "+" + e();
    } else if(rule == 3) {
        return e() + "-" + e();
    } else if(rule == 4) {
        return e() + "*" + e();
    } else if(rule == 5) {
        return e() + "/" + e();
    }

    //rule 6
    return n();
}
\end{code}
\end{block}
\fi

\column{0.5\textwidth}
    \begin{itemize}
        \item Each non-terminal becomes a function.
        \item Randomly select the rule to expand.
        \item Call the appropriate non-terminals and concatenate with the terminals.
        \item NOTE: Usually, we must limit the depth otherwise the strings become too long!
    \end{itemize}
    \end{columns}
\end{frame}

\section{Types of Languages}
\begin{frame}{Classifications of Languages and Recognizers}
    \begin{block}{Language Recognition}
        A compiler and/or interpreter is a program that essentially recognizes a 
        string and then processes it.
    \end{block}
    \begin{itemize}
        \item Languages are classified by the types of rules in their grammar.
        \item This corresponds to the complexity of recognizing the languages.
    \end{itemize}
\end{frame}

\begin{frame}{The Chomsky Hierarchy}
\begin{columns}
\column{0.6\textwidth}
\begin{itemize}
    \item Type-0 {\bf Recursively Enumerable}
    \[
        \gamma \rightarrow \alpha
    \]
    \item Type-1 {\bf Context Sensitive}
    \[
    \alpha A \beta \rightarrow \alpha \gamma \beta
    \]
    \item Type-2 {\bf Context-Free}
    \[
    A \rightarrow \alpha
    \]
    \item Type-3 {\bf Regular}
    \[
    A \rightarrow a \textrm{ and }
    A \rightarrow aB
    \]
\end{itemize}
\column{0.4\textwidth}
    \begin{block}{Definitions}
        \begin{itemize}
            \item $a$ --- terminal
            \item $A,B$ --- non-terminal
            \item $\alpha, \beta, \gamma$ --- string of terminals and/or non-terminals
            \begin{itemize}
                \item $\alpha,\beta$ may be empty
                \item $\gamma$ never empty
            \end{itemize}
        \end{itemize}
    \end{block}

\end{columns}
\end{frame}

\begin{frame}{Container Hierarchy}
\image[Chomsky-Hierarchy]{chomsky-hierarchy}{Wikipedia}
\end{frame}

\begin{frame}{Language Recognition Requirements}
    \begin{itemize}
        \item Type-0 Recursively Enumerable --- Turing Machine
        \item Type-1 Context Sensitive --- Linear Bounded Turing Machine
        \item Type-2 Context-Free --- Non-Deterministic Push-Down Automaton
        \item Type-3 Regular --- Finite State Automaton
    \end{itemize}
\end{frame}


\section{BNF Notation}

\begin{frame}{Programming Languages and Formal Grammars}
    Most programming languages are:
    \begin{itemize}
        \item Predominantly context-free syntax.
        \item Some context-sensitive elements (though usually not expressed in the grammar.)
    \end{itemize}
    
    \begin{block}{Syntax}
        The {\bf syntax} of a programming language is its grammar. It determines if a string
        is a well-formed program.
    \end{block}
\end{frame}

\begin{frame}{BNF Notation}
    \begin{block}{BNF}
        Backus-Naur Form is a convenient plain-text formal representation of context-free grammars.
    \end{block}
    \begin{itemize}
        \item Non-terminals are denoted in angle brackets: \texttt{< Name >}
        \item Terminals are denoted in quotes: \texttt{"0"}
        \item Arrows are rendered as: \texttt{::=}
        \item Multiple rules are joined using the or symbol: \texttt{|}
    \end{itemize}
\end{frame}

\begin{frame}[fragile]{BNF Example}
\begin{code}
< Start >      ::= < Expression >

< Expression > ::= < Expression > "+" < Expression > 
                   | < Expression > "-" < Expression >
                   | < Expression > "*" < Expression >
                   | < Expression > "/" < Expression >
                   | < Number >

< Number >     ::= < Number > < Digit > 
                   | < Digit >

< Digit >      ::= "0" | "1" | "2" | "3" | ... | "9"
\end{code}
\end{frame}


%\begin{frame}{Reading Assignment}
%    \begin{itemize}
%        \item \textit{Three Models for the Description of Language} by Noam Chomsky (chomsky-1956.pdf)
%        \item \textit{On Certain Formal Properties of Grammars} by Noam Chomsky (chomsky-1959.pdf)
%        \item Chapter 3 of your Textbook
%    \end{itemize}
%\end{frame}


\end{document}