Add read local file option

feng-li · feng-li · commit c64bd2e10c13 · 2024-04-16T12:30:39.000+08:00
diff --git a/L06-Data-Processing-with-Spark/L06.1-Structured-Data-Processing-with-Spark.ipynb b/L06-Data-Processing-with-Spark/L06.1-Structured-Data-Processing-with-Spark.ipynb
@@ -390,7 +390,25 @@
     }
    ],
    "source": [
-    "sdf = spark.read.csv(\"data/people.txt\") \n",
+    "sdf = spark.read.csv(\"data/people.txt\") # read hdfs file \n",
+    "sdf.show() # Displays the content of the DataFrame to stdout"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# If your data are available locally, you could explicitly specify with \"file://\"\n",
+    "# But for this to work, the copy of the file needs to be on every worker or \n",
+    "# every worker need to have access to common shared drive as in a NFS mount.\n",
+    "\n",
+    "sdf = spark.read.csv(\"file:///home/fli/data/people.txt\") # read hdfs file \n",
     "sdf.show() # Displays the content of the DataFrame to stdout"
    ]
   },