Skip to content

Commit 3add13b

Browse files
author
bhasharm7
committed
Airport Problem 1
1 parent 0968134 commit 3add13b

28 files changed

+1067
-375
lines changed

PlayingWithSpark/.idea/.name

-1
This file was deleted.

PlayingWithSpark/.idea/modules/PlayingWithSpark.iml

+185-185
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

PlayingWithSpark/.idea/sbt.xml

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

PlayingWithSpark/.idea/workspace.xml

+440-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

PlayingWithSpark/project/target/config-classes/$4bd11ea989eb68f0429e.cache

-1
This file was deleted.

PlayingWithSpark/project/target/streams/$global/update/$global/streams/update_cache_2.12/output

+1-1
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package rdd.airports
2+
3+
import utils.{Context, Utils}
4+
5+
6+
/* Create a Spark program to read the airport data from in/airports.text,
7+
find all the airports whose latitude are bigger than 40.
8+
Then output the airport's name and the airport's latitude to out/airports_by_latitude.text.
9+
Each row of the input file contains the following columns:
10+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
11+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
12+
Sample output:
13+
"St Anthony", 51.391944
14+
"Tofino", 49.082222
15+
...
16+
*/
17+
object AirportsByLatitudeProblem extends App with Context {
18+
19+
val airports = sparkSession.sparkContext.textFile("in/airports.text")
20+
21+
val airportLat = airports.filter(line => line.split(Utils.COMMA_DELIMITER)(6).toFloat > 40)
22+
23+
val airportLatAndName = airportLat.map(line => {
24+
val split = line.split(Utils.COMMA_DELIMITER)
25+
split(1) + " , " + split(6)
26+
})
27+
28+
airportLatAndName.saveAsTextFile("out/airportLatName.text")
29+
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package rdd.airports
2+
3+
import org.apache.spark.{SparkConf, SparkContext}
4+
import utils.{Context, Utils}
5+
6+
7+
/* Create a Spark program to read the airport data from in/airports.text,
8+
find all the airports which are located in United States
9+
and output the airport's name and the city's name to out/airports_in_usa.text.
10+
Each row of the input file contains the following columns:
11+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
12+
ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
13+
Sample output:
14+
"Putnam County Airport", "Greencastle"
15+
"Dowagiac Municipal Airport", "Dowagiac"
16+
...
17+
*/
18+
object AirportsInUSA extends App with Context {
19+
20+
val airports = sparkSession.sparkContext.textFile("in/airports.text")
21+
22+
val airportsInUSA = airports.filter(lines => lines.split(Utils.COMMA_DELIMITER)(3) == "\"United States\"")
23+
24+
airportsInUSA.foreach(println)
25+
26+
val airportNameAndCity = airportsInUSA.map(line => {
27+
val splits = line.split(Utils.COMMA_DELIMITER)
28+
splits(1) + "," + splits(2)
29+
})
30+
31+
airportNameAndCity.saveAsTextFile("out/airport_in_usa.txt")
32+
}

PlayingWithSpark/src/main/scala/utils/Context.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
package utils
22

3-
import org.apache.spark.SparkConf
43
import org.apache.spark.sql.SparkSession
54

65
trait Context {
76

7+
System.setProperty("hadoop.home.dir", "C:/hadoop")
8+
89
lazy val sparkSession: SparkSession = SparkSession
910
.builder()
1011
.appName("PlayingWithSpark")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package utils
2+
3+
object Utils {
4+
val COMMA_DELIMITER = ",(?=([^\"]*\"[^\"]*\")*[^\"]*$)"
5+
}

PlayingWithSpark/target/.history

+2
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ exit
44
exit
55
;set _root_.scala.collection.Seq(shellPrompt := { _ => "" },SettingKey[_root_.scala.Option[_root_.sbt.File]]("sbtStructureOutputFile") in _root_.sbt.Global := _root_.scala.Some(_root_.sbt.file("C:/Users/bhasharm7/AppData/Local/Temp/sbt-structure1.xml")),SettingKey[_root_.java.lang.String]("sbtStructureOptions") in _root_.sbt.Global := "download, resolveClassifiers");apply -cp "C:/Users/bhasharm7/.IdeaIC2018.2/config/plugins/Scala/launcher/sbt-structure-1.0.jar" org.jetbrains.sbt.CreateTasks;*/*:dumpStructure
66
exit
7+
;set _root_.scala.collection.Seq(shellPrompt := { _ => "" },SettingKey[_root_.scala.Option[_root_.sbt.File]]("sbtStructureOutputFile") in _root_.sbt.Global := _root_.scala.Some(_root_.sbt.file("C:/Users/bhasharm7/AppData/Local/Temp/sbt-structure.xml")),SettingKey[_root_.java.lang.String]("sbtStructureOptions") in _root_.sbt.Global := "download, resolveClassifiers");apply -cp "C:/Users/bhasharm7/.IdeaIC2018.2/config/plugins/Scala/launcher/sbt-structure-1.0.jar" org.jetbrains.sbt.CreateTasks;*/*:dumpStructure
8+
exit

PlayingWithSpark/target/scala-2.11/resolution-cache/default/playingwithspark$sources/0.1/resolved.xml.properties

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#default#playingwithspark$sources;0.1 resolved revisions
2-
#Tue Jun 04 11:58:24 IST 2019
2+
#Tue Jun 04 13:00:33 IST 2019
33
+organisation\:\#@\#\:+org.slf4j\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+slf4j-api\:\#@\#\:+revision\:\#@\#\:+1.7.16\:\#@\#\:=1.7.16 release 1.7.16 null
44
+organisation\:\#@\#\:+org.xerial.snappy\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+snappy-java\:\#@\#\:+revision\:\#@\#\:+1.1.2.6\:\#@\#\:=1.1.2.6 release 1.1.2.6 null
55
+organisation\:\#@\#\:+org.apache.hadoop\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+hadoop-common\:\#@\#\:+revision\:\#@\#\:+2.6.5\:\#@\#\:=2.6.5 release 2.6.5 null

PlayingWithSpark/target/scala-2.11/resolution-cache/default/playingwithspark$sources/0.1/resolved.xml.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
module="playingwithspark$sources"
55
revision="0.1"
66
status="integration"
7-
publication="20190604115814"
7+
publication="20190604125951"
88
/>
99
<configurations>
1010
<conf name="compile" visibility="public" description=""/>

0 commit comments

Comments
 (0)