Skip to content

Commit 9f22f08

Browse files
union example
1 parent 882ed6c commit 9f22f08

File tree

2 files changed

+47
-5
lines changed

2 files changed

+47
-5
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package com.sparkbyexamples.spark.dataframe
2+
3+
import org.apache.spark.sql.SparkSession
4+
5+
object UnionExample extends App{
6+
7+
val spark: SparkSession = SparkSession.builder()
8+
.master("local[1]")
9+
.appName("SparkByExamples.com")
10+
.getOrCreate()
11+
12+
spark.sparkContext.setLogLevel("ERROR")
13+
14+
import spark.implicits._
15+
16+
val simpleData = Seq(("James","Sales","NY",90000,34,10000),
17+
("Michael","Sales","NY",86000,56,20000),
18+
("Robert","Sales","CA",81000,30,23000),
19+
("Maria","Finance","CA",90000,24,23000)
20+
)
21+
val df = simpleData.toDF("employee_name","department","state","salary","age","bonus")
22+
df.printSchema()
23+
df.show()
24+
25+
val simpleData2 = Seq(("James","Sales","NY",90000,34,10000),
26+
("Maria","Finance","CA",90000,24,23000),
27+
("Jen","Finance","NY",79000,53,15000),
28+
("Jeff","Marketing","CA",80000,25,18000),
29+
("Kumar","Marketing","NY",91000,50,21000)
30+
)
31+
val df2 = simpleData2.toDF("employee_name","department","state","salary","age","bonus")
32+
df2.show(false)
33+
34+
val df3 = df.union(df2)
35+
df3.show(false)
36+
df3.distinct().show(false)
37+
38+
val df4 = df.unionAll(df2)
39+
df4.show(false)
40+
41+
42+
}

src/main/scala/com/sparkbyexamples/spark/dataframe/WhereExample.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,23 +37,23 @@ object WhereExample extends App{
3737
df.show()
3838

3939
//Condition
40-
df.filter(df("state") === "OH")
40+
df.where(df("state") === "OH")
4141
.show(false)
4242

4343
//SQL Expression
44-
df.filter("gender == 'M'")
44+
df.where("gender == 'M'")
4545
.show(false)
4646

4747
//multiple condition
48-
df.filter(df("state") === "OH" && df("gender") === "M")
48+
df.where(df("state") === "OH" && df("gender") === "M")
4949
.show(false)
5050

5151
//Array condition
52-
df.filter(array_contains(df("languages"),"Java"))
52+
df.where(array_contains(df("languages"),"Java"))
5353
.show(false)
5454

5555
//Struct condition
56-
df.filter(df("name.lastname") === "Williams")
56+
df.where(df("name.lastname") === "Williams")
5757
.show(false)
5858

5959
}

0 commit comments

Comments
 (0)