Skip to content

Commit c7f5b86

Browse files
Handling Null values
1 parent cf71f75 commit c7f5b86

File tree

3 files changed

+97
-2
lines changed

3 files changed

+97
-2
lines changed

src/main/scala/com/sparkbyexamples/spark/dataframe/FilterExample.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ object FilterExample extends App{
3939
df.filter(df("state") === "OH")
4040
.show(false)
4141

42-
//Expression
42+
//SQL Expression
4343
df.filter("gender == 'M'")
4444
.show(false)
4545

4646
//multiple condition
4747
df.filter(df("state") === "OH" && df("gender") === "M")
4848
.show(false)
4949

50-
//Row condition
50+
//Array condition
5151
df.filter(array_contains(df("languages"),"Java"))
5252
.show(false)
5353

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package com.sparkbyexamples.spark.dataframe
2+
3+
import org.apache.spark.sql.SparkSession
4+
5+
object HandleNullExample extends App{
6+
7+
val spark: SparkSession = SparkSession.builder()
8+
.master("local[1]")
9+
.appName("SparkByExamples.com")
10+
.getOrCreate()
11+
12+
val filePath="src/main/resources/small_zipcode.csv"
13+
14+
val df = spark.read.options(Map("inferSchema"->"true","delimiter"->",","header"->"true")).csv(filePath)
15+
df.printSchema()
16+
df.show(false)
17+
18+
df.na.fill(0)
19+
.show(false)
20+
21+
df.na.fill(0,Array("population"))
22+
.show(false)
23+
24+
df.na.fill("")
25+
.show(false)
26+
27+
df.na.fill("unknown",Array("city"))
28+
.na.fill("",Array("type"))
29+
.show(false)
30+
31+
32+
33+
// Array and map columns
34+
35+
36+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package com.sparkbyexamples.spark.dataframe
2+
3+
import org.apache.spark.sql.functions.array_contains
4+
import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
5+
import org.apache.spark.sql.{Row, SparkSession}
6+
7+
object WhereExample extends App{
8+
9+
val spark: SparkSession = SparkSession.builder()
10+
.master("local[1]")
11+
.appName("SparkByExamples.com")
12+
.getOrCreate()
13+
14+
spark.sparkContext.setLogLevel("ERROR")
15+
16+
val arrayStructureData = Seq(
17+
Row(Row("James","","Smith"),List("Java","Scala","C++"),"OH","M"),
18+
Row(Row("Anna","Rose",""),List("Spark","Java","C++"),"NY","F"),
19+
Row(Row("Julia","","Williams"),List("CSharp","VB"),"OH","F"),
20+
Row(Row("Maria","Anne","Jones"),List("CSharp","VB"),"NY","M"),
21+
Row(Row("Jen","Mary","Brown"),List("CSharp","VB"),"NY","M"),
22+
Row(Row("Mike","Mary","Williams"),List("Python","VB"),"OH","M")
23+
)
24+
25+
val arrayStructureSchema = new StructType()
26+
.add("name",new StructType()
27+
.add("firstname",StringType)
28+
.add("middlename",StringType)
29+
.add("lastname",StringType))
30+
.add("languages", ArrayType(StringType))
31+
.add("state", StringType)
32+
.add("gender", StringType)
33+
34+
val df = spark.createDataFrame(
35+
spark.sparkContext.parallelize(arrayStructureData),arrayStructureSchema)
36+
df.printSchema()
37+
df.show()
38+
39+
//Condition
40+
df.filter(df("state") === "OH")
41+
.show(false)
42+
43+
//SQL Expression
44+
df.filter("gender == 'M'")
45+
.show(false)
46+
47+
//multiple condition
48+
df.filter(df("state") === "OH" && df("gender") === "M")
49+
.show(false)
50+
51+
//Array condition
52+
df.filter(array_contains(df("languages"),"Java"))
53+
.show(false)
54+
55+
//Struct condition
56+
df.filter(df("name.lastname") === "Williams")
57+
.show(false)
58+
59+
}

0 commit comments

Comments
 (0)