Skip to content

Commit a809598

Browse files
committed
Update
1 parent ec7e4fd commit a809598

15 files changed

+36
-19
lines changed

L00-Linux-Basics/L00.1-Linux-Basics.slides.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */

L01-Introduction-to-Distributed-Computing/L01.1-Introduction-to-Distributed-Computing.slides.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */

L01-Introduction-to-Distributed-Computing/L01.2-Introduction-to-Hadoop.slides.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */

L03-Statistical-Modeling-with-MapReduce/L03.1-Statistics-with-Hadoop.slides.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */

L03-Statistical-Modeling-with-MapReduce/L03.2-Statistical-Modeling-with-MapReduce.slides.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */

L04-Hive/L04-Hive.slides.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */

L05-Introduction-to-Spark/L05.1-Introduction-to-Spark.slides.html

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */
@@ -14136,7 +14137,7 @@ <h2 id="Why-Spark">Why Spark<a class="anchor-link" href="#Why-Spark">&#182;</a><
1413614137
</div>
1413714138
<div class="jp-InputArea jp-Cell-inputArea"><div class="jp-InputPrompt jp-InputArea-prompt">
1413814139
</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">
14139-
<h1 id="Who-creates-Spark">Who creates Spark<a class="anchor-link" href="#Who-creates-Spark">&#182;</a></h1><ul>
14140+
<h1 id="Who-created-Spark">Who created Spark<a class="anchor-link" href="#Who-created-Spark">&#182;</a></h1><ul>
1414014141
<li><p>Spark was a PhD student project in Berkerley University.</p>
1414114142
</li>
1414214143
<li><p><a href="https://cs.stanford.edu/people/matei/">Matei Zaharia</a> was the major contributor during his PhD at UC Berkeley in 2009.</p>
@@ -14161,6 +14162,9 @@ <h3 id="Ease-of-Use">Ease of Use<a class="anchor-link" href="#Ease-of-Use">&#182
1416114162
<li><p>Spark offers over 80 high-level operators that make it easy to build parallel apps. And you can use it interactively from the Scala, Python, R, and SQL shells.</p>
1416214163
</li>
1416314164
</ul>
14165+
<ul>
14166+
<li>DataFrame with pandas API support</li>
14167+
</ul>
1416414168

1416514169
</div>
1416614170
</div>
@@ -14246,7 +14250,7 @@ <h2 id="Launching-Applications-with-spark-submit">Launching Applications with <c
1424614250
--deploy-mode &lt;deploy-mode&gt; \
1424714251
--conf &lt;key&gt;=&lt;value&gt; \
1424814252
... # other options
14249-
&lt;application-jar&gt; \
14253+
&lt;application&gt; \
1425014254
[application-arguments]</code></pre>
1425114255

1425214256
</div>
@@ -14279,7 +14283,7 @@ <h2 id="Run-on-a-YARN-cluster">Run on a YARN cluster<a class="anchor-link" href=
1427914283
</div>
1428014284
<div class="jp-InputArea jp-Cell-inputArea"><div class="jp-InputPrompt jp-InputArea-prompt">
1428114285
</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">
14282-
<h2 id="Run-a-Python-application-on-a-Spark-standalone-cluster">Run a Python application on a Spark standalone cluster<a class="anchor-link" href="#Run-a-Python-application-on-a-Spark-standalone-cluster">&#182;</a></h2>
14286+
<h2 id="Run-a-Python-application-on-a-Spark-on-YARN-cluster">Run a Python application on a Spark-on-YARN cluster<a class="anchor-link" href="#Run-a-Python-application-on-a-Spark-on-YARN-cluster">&#182;</a></h2>
1428314287
<pre><code>PYSPARK_PYTHON=python3.7 spark-submit \
1428414288
--master yarn \
1428514289
examples/src/main/python/pi.py \
@@ -14358,7 +14362,7 @@ <h2 id="Run-spark-interactively-within-Python">Run spark interactively within Py
1435814362
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
1435914363
<div class="CodeMirror cm-s-jupyter">
1436014364
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">findspark</span>
14361-
<span class="n">findspark</span><span class="o">.</span><span class="n">init</span><span class="p">(</span><span class="s1">'/usr/lib/spark-current/'</span><span class="p">)</span>
14365+
<span class="n">findspark</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
1436214366
</pre></div>
1436314367

1436414368
</div>

L05-Introduction-to-Spark/L05.2-Datasets-and-Parallelization.slides.html

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */
@@ -14237,7 +14238,7 @@ <h2 id="Create-a-SparkContext-object">Create a SparkContext object<a class="anch
1423714238
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
1423814239
<div class="CodeMirror cm-s-jupyter">
1423914240
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">findspark</span>
14240-
<span class="n">findspark</span><span class="o">.</span><span class="n">init</span><span class="p">(</span><span class="s2">"/usr/lib/spark-current"</span><span class="p">)</span>
14241+
<span class="n">findspark</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
1424114242
<span class="kn">import</span> <span class="nn">pyspark</span>
1424214243
</pre></div>
1424314244

@@ -14254,7 +14255,7 @@ <h2 id="Create-a-SparkContext-object">Create a SparkContext object<a class="anch
1425414255
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[&nbsp;]:</div>
1425514256
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
1425614257
<div class="CodeMirror cm-s-jupyter">
14257-
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">conf</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">SparkConf</span><span class="p">()</span><span class="o">.</span><span class="n">setAppName</span><span class="p">(</span><span class="s2">"My First Spark RDD APP"</span><span class="p">)</span><span class="o">.</span><span class="n">setMaster</span><span class="p">(</span><span class="s2">"local"</span><span class="p">)</span> <span class="c1"># “yarn”</span>
14258+
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">conf</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">SparkConf</span><span class="p">()</span><span class="o">.</span><span class="n">setAppName</span><span class="p">(</span><span class="s2">"My First Spark RDD APP"</span><span class="p">)</span> <span class="c1">#.setMaster("local") # “yarn”</span>
1425814259
<span class="n">sc</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">SparkContext</span><span class="p">(</span><span class="n">conf</span><span class="o">=</span><span class="n">conf</span><span class="p">)</span>
1425914260
</pre></div>
1426014261

@@ -15317,7 +15318,7 @@ <h3 id="Distributed-matrix">Distributed matrix<a class="anchor-link" href="#Dist
1531715318
<div class="jp-InputArea jp-Cell-inputArea"><div class="jp-InputPrompt jp-InputArea-prompt">
1531815319
</div><div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput " data-mime-type="text/markdown">
1531915320
<h2 id="Lab">Lab<a class="anchor-link" href="#Lab">&#182;</a></h2><ul>
15320-
<li><p>Run a simple Spark application with the yarn mode。</p>
15321+
<li><p>Run a simple Spark application。</p>
1532115322
</li>
1532215323
<li><p>Understand the <a href="https://spark.apache.org/docs/latest/mllib-data-types.html">basic data structure with RDD interface</a>。</p>
1532315324
</li>

L06-Data-Processing-with-Spark/L06.1-Structured-Data-Processing-with-Spark.slides.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */
@@ -14203,7 +14204,7 @@ <h2 id="Start-a-Spark-session">Start a Spark session<a class="anchor-link" href=
1420314204
<div class="CodeMirror cm-s-jupyter">
1420414205
<div class=" highlight hl-ipython3"><pre><span></span><span class="c1">## Only needed when you run spark witin Jupyter notebook</span>
1420514206
<span class="kn">import</span> <span class="nn">findspark</span>
14206-
<span class="n">findspark</span><span class="o">.</span><span class="n">init</span><span class="p">(</span><span class="s1">'/usr/lib/spark-current'</span><span class="p">)</span>
14207+
<span class="n">findspark</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
1420714208
</pre></div>
1420814209

1420914210
</div>

L06-Data-Processing-with-Spark/L06.2-Working-with-Spark-DataFrame.slides.html

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
.highlight .m { color: var(--jp-mirror-editor-number-color) } /* Literal.Number */
5151
.highlight .s { color: var(--jp-mirror-editor-string-color) } /* Literal.String */
5252
.highlight .ow { color: var(--jp-mirror-editor-operator-color); font-weight: bold } /* Operator.Word */
53+
.highlight .pm { color: var(--jp-mirror-editor-punctuation-color) } /* Punctuation.Marker */
5354
.highlight .w { color: var(--jp-mirror-editor-variable-color) } /* Text.Whitespace */
5455
.highlight .mb { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Bin */
5556
.highlight .mf { color: var(--jp-mirror-editor-number-color) } /* Literal.Number.Float */
@@ -14133,9 +14134,9 @@ <h2 id="Start-a-Spark-Session">Start a Spark Session<a class="anchor-link" href=
1413314134
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
1413414135
<div class="CodeMirror cm-s-jupyter">
1413514136
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">findspark</span>
14136-
<span class="n">findspark</span><span class="o">.</span><span class="n">init</span><span class="p">(</span><span class="s1">'/usr/lib/spark-current'</span><span class="p">)</span>
14137+
<span class="n">findspark</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
1413714138
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
14138-
<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">"yarn"</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"Python Spark with DataFrame"</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> <span class="c1"># .master("local")</span>
14139+
<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"Python Spark with DataFrame"</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> <span class="c1"># .master("local")</span>
1413914140
<span class="n">spark</span>
1414014141
</pre></div>
1414114142

@@ -14188,7 +14189,7 @@ <h2 id="Start-a-Spark-Session">Start a Spark Session<a class="anchor-link" href=
1418814189

1418914190
</div>
1419014191

14191-
</div>
14192+
</div></div></section></section><section><section>
1419214193
<div class="jp-Cell jp-MarkdownCell jp-Notebook-cell">
1419314194
<div class="jp-Cell-inputWrapper">
1419414195
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
@@ -14199,14 +14200,13 @@ <h2 id="Start-a-Spark-Session">Start a Spark Session<a class="anchor-link" href=
1419914200
<p>If you have problem to start pyspark interactive session due to system limitation. You could submit your spakr Job via the <code>spark-submit</code> command as below.</p>
1420014201

1420114202
<pre><code>PYSPARK-PYTHON=python3.7 spark-submit \
14202-
--master yarn \
1420314203
--conf spark.ui.enabled=false \
1420414204
your-pyspark-code.py</code></pre>
1420514205

1420614206
</div>
1420714207
</div>
1420814208
</div>
14209-
</div></div></section></section><section><section>
14209+
</div></section></section><section><section>
1421014210
<div class="jp-Cell jp-MarkdownCell jp-Notebook-cell">
1421114211
<div class="jp-Cell-inputWrapper">
1421214212
<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
@@ -14315,7 +14315,7 @@ <h2 id="Read-file-and-infer-the-schema-from-the-header">Read file and infer the
1431514315
<span class="n">StructField</span><span class="p">(</span><span class="s1">'NASDelay'</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">),</span>
1431614316
<span class="n">StructField</span><span class="p">(</span><span class="s1">'SecurityDelay'</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">),</span>
1431714317
<span class="n">StructField</span><span class="p">(</span><span class="s1">'LateAircraftDelay'</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="kc">True</span><span class="p">)</span>
14318-
<span class="p">])</span>
14318+
<span class="p">])</span>
1431914319
</pre></div>
1432014320

1432114321
</div>
@@ -14348,7 +14348,7 @@ <h2 id="Read-file-and-infer-the-schema-from-the-header">Read file and infer the
1434814348
<div class="jp-InputPrompt jp-InputArea-prompt">In&nbsp;[15]:</div>
1434914349
<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
1435014350
<div class="CodeMirror cm-s-jupyter">
14351-
<div class=" highlight hl-ipython3"><pre><span></span>
14351+
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">air</span>
1435214352
</pre></div>
1435314353

1435414354
</div>
@@ -14782,7 +14782,7 @@ <h3 id="Select-columns">Select columns<a class="anchor-link" href="#Select-colum
1478214782
<div class="CodeMirror cm-s-jupyter">
1478314783
<div class=" highlight hl-ipython3"><pre><span></span><span class="c1">## Group and sort</span>
1478414784
<span class="n">aircount</span><span class="o">=</span><span class="n">air</span><span class="o">.</span><span class="n">groupBy</span><span class="p">(</span><span class="s2">"UniqueCarrier"</span><span class="p">)</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
14785-
<span class="n">aircount</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="s2">"count"</span><span class="p">,</span><span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
14785+
<span class="n">aircount</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="s2">"count"</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
1478614786
</pre></div>
1478714787

1478814788
</div>

0 commit comments

Comments
 (0)