-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFormal Grammars and BNF.tex
414 lines (353 loc) · 10.7 KB
/
Formal Grammars and BNF.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
% Uncomment for handout
%\def\HANDOUT{}
% language selection
%\def\PYTHON{}
\def\CPP{}
\ifdefined\HANDOUT
\documentclass[]{beamer}
\usepackage{pgfpages}
\pgfpagesuselayout{4 on 1}[letterpaper,landscape,border shrink=5mm]
\else
\documentclass{beamer}
\fi
\mode<presentation>
{
\usetheme{Warsaw}
\definecolor{sered}{rgb}{0.78, 0.06, 0.18}
\definecolor{richblack}{rgb}{0.0, 0.0, 0.0}
\setbeamercolor{structure}{fg=sered,bg=richblack}
%\setbeamercovered{transparent}
}
\usepackage[english]{babel}
\usepackage[latin1]{inputenc}
\usepackage{times}
\usepackage[T1]{fontenc}
\usepackage{tikz}
\usepackage{graphicx}
\usepackage[export]{adjustbox}
\usepackage{fancyvrb}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{esvect}
\makeatletter
\newcommand{\imagesource}[1]{{\centering\hfill\break\hbox{\scriptsize Image Source:\thinspace{\tiny\itshape #1}}\par}}
\newcommand{\image}[3][\@nil]{%
\def\tmp{#1}%
\begin{center}
\ifx\tmp\@nnil
\includegraphics[max height = 0.55\textheight, max width = \textwidth]{images/#2}
\else
\includegraphics[max height = 0.50\textheight, max width = \textwidth]{images/#2}
\linebreak
#1
\fi
\linebreak
{\tiny Image Source:\thinspace{\tiny #3}}
\end{center}
}
\newenvironment{code}{%
\VerbatimEnvironment
\begin{adjustbox}{max width=\textwidth, max height=0.7\textheight}
\begin{BVerbatim}
}{
\end{BVerbatim}
\end{adjustbox}
}
\title{Formal Grammars and BNF}
\author{Robert Lowe}
\institute[Southeast Missouri State University] % (optional, but mostly needed)
{
Department of Computer Science\\
Southeast Missouri State University
}
\date[]{}
\subject{}
\pgfdeclareimage[height=1.0cm]{university-logo}{images/semo-logo}
\logo{\pgfuseimage{university-logo}}
\AtBeginSection[]
{
\begin{frame}<beamer>{Outline}
\tableofcontents[currentsection]
\end{frame}
}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}{Outline}
\tableofcontents
\end{frame}
% Structuring a talk is a difficult task and the following structure
% may not be suitable. Here are some rules that apply for this
% solution:
% - Exactly two or three sections (other than the summary).
% - At *most* three subsections per section.
% - Talk about 30s to 2min per frame. So there should be between about
% 15 and 30 frames, all told.
% - A conference audience is likely to know very little of what you
% are going to talk about. So *simplify*!
% - In a 20min talk, getting the main ideas across is hard
% enough. Leave out details, even if it means being less precise than
% you think necessary.
% - If you omit details that are vital to the proof/implementation,
% just say so once. Everybody will be happy with that.
\section{Languages and Grammar}
\begin{frame}{Why use formalisms?}
\begin{itemize}
\item Precise Specifications
\item Mathematical Validation of Implementations
\item Accurate Documentation of a Language
\end{itemize}
\end{frame}
\begin{frame}{Language Definition}
\[
L = \{\textrm{Set of all strings in language L}\}
\]
\begin{itemize}
\item A language is a set of strings.
\item Most interesting languages are infinite in length.
\item The strings within a language must be identifiable. That is, given a string
$s$, the following most hold:
\[
\forall s,\ s \in L \vee s \notin L
\]
\end{itemize}
\end{frame}
\begin{frame}{Formal Grammar}
\[
L = \{ s \in \Sigma^* : s\ \textrm{is generated by G} \}
\]
\[
s \in L \iff s\ \textrm{is generated by G}
\]
\begin{itemize}
\item A {\bf formal grammar} $G$ is a set of rules which generates the strings in $L$.
\item $\Sigma$ is the {\bf alphabet} of the language.
\item $\Sigma^*$ is the {\bf Kleene Closure} of the alphabet.
\begin{itemize}
\item Set of all strings of all lengths consisting of the symbols in $\Sigma$
\item Example $\{a,b\}^* = \{\emptyset, a, b, aa, ab, ba, bb, aab, aba, \ldots \}$
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile]{Example Formal Grammar}
\begin{columns}
\column{0.3\textwidth}
Rules in $G$
\begin{enumerate}
\item $s \rightarrow e$
\item $e \rightarrow e + e$
\item $e \rightarrow e - e$
\item $e \rightarrow e * e$
\item $e \rightarrow e / e$
\item $e \rightarrow n$
\item $n \rightarrow nd $
\item $n \rightarrow d $
\item $d \rightarrow 0 $
\item $d \rightarrow 1 $
\end{enumerate}
\column{0.7\textwidth}
Some of the strings generated by $G$
\begin{verbatim}
0
01
10
11
100
101
11111111110000011111
1+1
10+11
101+111*11
1+1-1+1-1+1
\end{verbatim}
\end{columns}
\end{frame}
\begin{frame}{Components of a Grammar}
\[
G = \langle \Sigma, N, S, P\rangle
\]
\begin{itemize}
\item $\Sigma$ --- Set of {\bf Terminal Symbols}
\item $N$ --- Set of {\bf Non-Terminal Symbols}
\item $S$ --- The {\bf Start Symbol}
\item $P$ --- Set of {\bf Production Rules}
\end{itemize}
\end{frame}
\begin{frame}{Full Formal Example}
\[
G = \langle \Sigma, N, S, P \rangle
\]
\begin{itemize}
\item $\Sigma=\{+,-,*,/,0,1\}$
\item $N=\{s,e,n,d\}$
\item $S=s$
\item $P=\{ s \rightarrow e, e \rightarrow e + e, e \rightarrow e - e, e \rightarrow e * e,%
e \rightarrow e / e,e \rightarrow n,n \rightarrow nd,n \rightarrow d,%
d \rightarrow 0, d \rightarrow 1\}$
\end{itemize}
\end{frame}
\begin{frame}[fragile]{Generating Strings from a Grammar}
General algorithm framework:
\vspace{0.25in}
\begin{code}
1.) s = S
2.) while s contains a non-terminal:
3.) select a sub-string and matching replacement rule
4.) replace the sub-string with replacement rule
\end{code}
\vspace{0.25in}
\end{frame}
\begin{frame}[fragile]{Recursive Random Generation}
\begin{columns}
\column{0.5\textwidth}
\ifdefined\PYTHON
\begin{block}{Python Excerpt}
\begin{code}
def e():
i = randint(1, 5)
if i == 1:
return e() + '+' + e()
elif i == 2:
return e() + '-' + e()
elif i == 3:
return e() + '*' + e()
elif i == 4:
return e() + '/' + e()
else:
return n()
\end{code}
\end{block}
\fi
\ifdefined\CPP
\begin{block}{C++ Excerpt}
\begin{code}
std::string e()
{
int rule = select_rule(2,6);
if(rule == 2) {
return e() + "+" + e();
} else if(rule == 3) {
return e() + "-" + e();
} else if(rule == 4) {
return e() + "*" + e();
} else if(rule == 5) {
return e() + "/" + e();
}
//rule 6
return n();
}
\end{code}
\end{block}
\fi
\column{0.5\textwidth}
\begin{itemize}
\item Each non-terminal becomes a function.
\item Randomly select the rule to expand.
\item Call the appropriate non-terminals and concatenate with the terminals.
\item NOTE: Usually, we must limit the depth otherwise the strings become too long!
\end{itemize}
\end{columns}
\end{frame}
\section{Types of Languages}
\begin{frame}{Classifications of Languages and Recognizers}
\begin{block}{Language Recognition}
A compiler and/or interpreter is a program that essentially recognizes a
string and then processes it.
\end{block}
\begin{itemize}
\item Languages are classified by the types of rules in their grammar.
\item This corresponds to the complexity of recognizing the languages.
\end{itemize}
\end{frame}
\begin{frame}{The Chomsky Hierarchy}
\begin{columns}
\column{0.6\textwidth}
\begin{itemize}
\item Type-0 {\bf Recursively Enumerable}
\[
\gamma \rightarrow \alpha
\]
\item Type-1 {\bf Context Sensitive}
\[
\alpha A \beta \rightarrow \alpha \gamma \beta
\]
\item Type-2 {\bf Context-Free}
\[
A \rightarrow \alpha
\]
\item Type-3 {\bf Regular}
\[
A \rightarrow a \textrm{ and }
A \rightarrow aB
\]
\end{itemize}
\column{0.4\textwidth}
\begin{block}{Definitions}
\begin{itemize}
\item $a$ --- terminal
\item $A,B$ --- non-terminal
\item $\alpha, \beta, \gamma$ --- string of terminals and/or non-terminals
\begin{itemize}
\item $\alpha,\beta$ may be empty
\item $\gamma$ never empty
\end{itemize}
\end{itemize}
\end{block}
\end{columns}
\end{frame}
\begin{frame}{Container Hierarchy}
\image[Chomsky-Hierarchy]{chomsky-hierarchy}{Wikipedia}
\end{frame}
\begin{frame}{Language Recognition Requirements}
\begin{itemize}
\item Type-0 Recursively Enumerable --- Turing Machine
\item Type-1 Context Sensitive --- Linear Bounded Turing Machine
\item Type-2 Context-Free --- Non-Deterministic Push-Down Automaton
\item Type-3 Regular --- Finite State Automaton
\end{itemize}
\end{frame}
\section{BNF Notation}
\begin{frame}{Programming Languages and Formal Grammars}
Most programming languages are:
\begin{itemize}
\item Predominantly context-free syntax.
\item Some context-sensitive elements (though usually not expressed in the grammar.)
\end{itemize}
\begin{block}{Syntax}
The {\bf syntax} of a programming language is its grammar. It determines if a string
is a well-formed program.
\end{block}
\end{frame}
\begin{frame}{BNF Notation}
\begin{block}{BNF}
Backus-Naur Form is a convenient plain-text formal representation of context-free grammars.
\end{block}
\begin{itemize}
\item Non-terminals are denoted in angle brackets: \texttt{< Name >}
\item Terminals are denoted in quotes: \texttt{"0"}
\item Arrows are rendered as: \texttt{::=}
\item Multiple rules are joined using the or symbol: \texttt{|}
\end{itemize}
\end{frame}
\begin{frame}[fragile]{BNF Example}
\begin{code}
< Start > ::= < Expression >
< Expression > ::= < Expression > "+" < Expression >
| < Expression > "-" < Expression >
| < Expression > "*" < Expression >
| < Expression > "/" < Expression >
| < Number >
< Number > ::= < Number > < Digit >
| < Digit >
< Digit > ::= "0" | "1" | "2" | "3" | ... | "9"
\end{code}
\end{frame}
%\begin{frame}{Reading Assignment}
% \begin{itemize}
% \item \textit{Three Models for the Description of Language} by Noam Chomsky (chomsky-1956.pdf)
% \item \textit{On Certain Formal Properties of Grammars} by Noam Chomsky (chomsky-1959.pdf)
% \item Chapter 3 of your Textbook
% \end{itemize}
%\end{frame}
\end{document}