-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCommands
35 lines (21 loc) · 962 Bytes
/
Commands
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#creating the dataframe (making instance of SparkSQL with the .json file)
# val sqlcontext= new org.apache.spark.sql.SQLContext(sc)
val a=spark.read.format("json").load("/home/nikhil/abcd.json")
#to show the data
a.show
#To pront the schema
a.pritnSchema
#to show id
a.select("id").show()
#select the name and incerement the id by 1
a.select(a("name"), a("id")+1).show()
# select the people greater than id 10
a.filter(a("id")>10).show()
#count people by age
a.groupBy("age").count().show()
#to create dataframe in csv file
val t=spark.read.format("csv").option("header" , "true").load("/home/nikhil/sql.csv")
case class Employee(g: String, name: String) //schema
val op=sc,textFile("/home/nikhil/new.txt).map(_.split(",")).map(e=> Employee(e(0),e(1))).toDF() //map is transformation
op.registerTempTable("employee")
val allrecords=sqlcontext.sql("SELECT * FROM employee") // sql query not dataframe query to run this code first imprt sqlcontext