jgperrin · vpostrigan · Jan 20, 2022 · Jan 20, 2022
diff --git a/...ain/java/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionApp.java b/...ain/java/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionApp.java
@@ -39,8 +39,9 @@ private void start() {
         .master("local")
         .getOrCreate();
 
-    // Reads a CSV file with header, called books.csv, stores it in a
-    // dataframe
+    // Reads a CSV file with header, called
+    // Restaurants_in_Wake_County_NC.csv,
+    // stores it in a dataframe
     Dataset<Row> df = spark.read().format("csv")
         .option("header", "true")
         .load("data/Restaurants_in_Wake_County_NC.csv");

diff --git a/...rk/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationApp.java b/...rk/ch03/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationApp.java
@@ -60,8 +60,8 @@ private void start() {
         .withColumn("dateEnd", df.col("fields.closing_date"))
         .withColumn("type",
             split(df.col("fields.type_description"), " - ").getItem(1))
-        .withColumn("geoX", df.col("fields.geolocation").getItem(0))
-        .withColumn("geoY", df.col("fields.geolocation").getItem(1));
+        .withColumn("geoX", df.col("fields.geolocation").getItem(1))
+        .withColumn("geoY", df.col("fields.geolocation").getItem(0));
     df = df.withColumn("id",
         concat(df.col("state"), lit("_"),
             df.col("county"), lit("_"),

diff --git a/src/main/java/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionApp.java b/src/main/java/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionApp.java
@@ -121,8 +121,8 @@ private Dataset<Row> buildDurhamRestaurantsDataframe() {
         .withColumn("dateEnd", df.col("fields.closing_date"))
         .withColumn("type",
             split(df.col("fields.type_description"), " - ").getItem(1))
-        .withColumn("geoX", df.col("fields.geolocation").getItem(0))
-        .withColumn("geoY", df.col("fields.geolocation").getItem(1))
+        .withColumn("geoX", df.col("fields.geolocation").getItem(1))
+        .withColumn("geoY", df.col("fields.geolocation").getItem(0))
         .drop(df.col("fields"))
         .drop(df.col("geometry"))
         .drop(df.col("record_timestamp"))

diff --git a/...in/python/lab220_json_ingestion_schema_manipulation/jsonIngestionSchemaManipulationApp.py b/...in/python/lab220_json_ingestion_schema_manipulation/jsonIngestionSchemaManipulationApp.py
@@ -41,8 +41,8 @@ def main(spark):
                 .withColumn("dateStart", F.col("fields.opening_date")) \
                 .withColumn("dateEnd", F.col("fields.closing_date")) \
                 .withColumn("type", F.split(F.col("fields.type_description"), " - ").getItem(1)) \
-                .withColumn("geoX", F.col("fields.geolocation").getItem(0)) \
-                .withColumn("geoY", F.col("fields.geolocation").getItem(1))
+                .withColumn("geoX", F.col("fields.geolocation").getItem(1)) \
+                .withColumn("geoY", F.col("fields.geolocation").getItem(0))
 
         df = df.withColumn("id", F.concat(F.col("state"), F.lit("_"),
                                           F.col("county"), F.lit("_"),

diff --git a/src/main/python/lab230_dataframe_union/util.py b/src/main/python/lab230_dataframe_union/util.py
@@ -56,8 +56,8 @@ def build_durham_restaurants_dataframe(df):
             .withColumn("dateStart", F.col("fields.opening_date")) \
             .withColumn("dateEnd", F.col("fields.closing_date")) \
             .withColumn("type", F.split(F.col("fields.type_description"), " - ").getItem(1)) \
-            .withColumn("geoX", F.col("fields.geolocation").getItem(0)) \
-            .withColumn("geoY", F.col("fields.geolocation").getItem(1)) \
+            .withColumn("geoX", F.col("fields.geolocation").getItem(1)) \
+            .withColumn("geoY", F.col("fields.geolocation").getItem(0)) \
             .drop(*drop_cols)
 
     df = df.withColumn("id",

diff --git a/...la/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionScalaApp.scala b/...la/net/jgp/books/spark/ch03/lab210_schema_introspection/SchemaIntrospectionScalaApp.scala
@@ -22,8 +22,9 @@ object SchemaIntrospectionScalaApp {
     val spark = SparkSession.builder.appName("Schema introspection for restaurants in Wake County, NC")
       .master("local").getOrCreate
 
-    // Reads a CSV file with header, called books.csv, stores it in a
-    // dataframe
+    // Reads a CSV file with header, called
+    // Restaurants_in_Wake_County_NC.csv,
+    // stores it in a dataframe
     var df = spark.read.format("csv").option("header", "true")
       .load("data/Restaurants_in_Wake_County_NC.csv")
 

diff --git a/...3/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationScalaApp.scala b/...3/lab220_json_ingestion_schema_manipulation/JsonIngestionSchemaManipulationScalaApp.scala
@@ -42,8 +42,8 @@ object JsonIngestionSchemaManipulationScalaApp {
           .withColumn("dateStart", col("fields.opening_date"))
           .withColumn("dateEnd", col("fields.closing_date"))
           .withColumn("type", split(col("fields.type_description"), " - ").getItem(1))
-          .withColumn("geoX", col("fields.geolocation").getItem(0))
-          .withColumn("geoY", col("fields.geolocation").getItem(1))
+          .withColumn("geoX", col("fields.geolocation").getItem(1))
+          .withColumn("geoY", col("fields.geolocation").getItem(0))
 
     val cols_list = List(col("state"), lit("_"), col("county"), lit("_"), col("datasetId"))
 

diff --git a/src/main/scala/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionScalaApp.scala b/src/main/scala/net/jgp/books/spark/ch03/lab230_dataframe_union/DataframeUnionScalaApp.scala
@@ -95,8 +95,8 @@ object DataframeUnionScalaApp {
                 .withColumn("dateStart", col("fields.opening_date"))
                 .withColumn("dateEnd", col("fields.closing_date"))
                 .withColumn("type", split(col("fields.type_description"), " - ").getItem(1))
-                .withColumn("geoX", col("fields.geolocation").getItem(0))
-                .withColumn("geoY", col("fields.geolocation").getItem(1))
+                .withColumn("geoX", col("fields.geolocation").getItem(1))
+                .withColumn("geoY", col("fields.geolocation").getItem(0))
                 .drop(drop_cols:_*)
 
     df1 = df1.withColumn("id",