-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
187 changed files
with
559 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{ "reportRequests":[ { "viewId":"XXXX", "dateRanges":[ { "endDate":"2015-06-30", "startDate":"2015-06-15" }], "dimensions":[ { "name":"ga:browser" }, { "name":"ga:campaign" }], "metrics":[ { "alias":"sessions", "expression":"ga:sessions" }], "pivots":[ { "dimensions":[ { "name":"ga:userAgeBracket" }], "startGroup":"0", "maxGroupCount":"3", "metrics":[ { "alias":"sessions", "expression":"ga:sessions" }, { "alias":"pageviews", "expression":"ga:pageviews" }] }] }]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{"group":"A", "ts":1, "value":100} | ||
{"group":"A", "ts":2, "value":101} | ||
{"group":"A", "ts":3, "value":102} | ||
{"group":"A", "ts":4, "value":103} | ||
{"group":"A", "ts":5, "value":104} | ||
{"group":"A", "ts":6, "value":105} | ||
{"group":"A", "ts":7, "value":103} | ||
{"group":"B", "ts":12, "value":101} | ||
{"group":"B", "ts":13, "value":102} | ||
{"group":"B", "ts":14, "value":103} | ||
{"group":"B", "ts":15, "value":104} | ||
{"group":"B", "ts":16, "value":103} | ||
{"group":"B", "ts":17, "value":103} | ||
{"group":"B", "ts":18, "value":102} | ||
{"group":"B", "ts":19, "value":105} | ||
{"group":"A", "ts":8, "value":102} | ||
{"group":"A", "ts":9, "value":105} | ||
{"group":"A", "ts":11, "value":100} | ||
{"group":"A", "ts":12, "value":101} | ||
{"group":"A", "ts":13, "value":102} | ||
{"group":"A", "ts":14, "value":103} | ||
{"group":"A", "ts":15, "value":104} | ||
{"group":"A", "ts":16, "value":105} | ||
{"group":"A", "ts":17, "value":103} | ||
{"group":"A", "ts":18, "value":102} | ||
{"group":"A", "ts":19, "value":105} | ||
{"group":"B", "ts":1, "value":100} | ||
{"group":"B", "ts":2, "value":101} | ||
{"group":"B", "ts":3, "value":102} | ||
{"group":"B", "ts":4, "value":103} | ||
{"group":"B", "ts":5, "value":104} | ||
{"group":"B", "ts":6, "value":105} | ||
{"group":"B", "ts":7, "value":103} | ||
{"group":"B", "ts":8, "value":102} | ||
{"group":"B", "ts":9, "value":105} | ||
{"group":"B", "ts":11, "value":106} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{"group":"A", "ts":1, "value":100} | ||
{"group":"A", "ts":2, "value":101} | ||
{"group":"A", "ts":3, "value":102} | ||
{"group":"A", "ts":4, "value":103} | ||
{"group":"A", "ts":15, "value":104} | ||
{"group":"A", "ts":16, "value":105} | ||
{"group":"A", "ts":17, "value":103} | ||
{"group":"A", "ts":28, "value":104} | ||
{"group":"A", "ts":29, "value":105} | ||
{"group":"A", "ts":30, "value":103} | ||
{"group":"A", "ts":41, "value":100} | ||
{"group":"A", "ts":42, "value":101} | ||
{"group":"A", "ts":53, "value":102} | ||
{"group":"A", "ts":54, "value":103} | ||
{"group":"A", "ts":55, "value":102} | ||
{"group":"A", "ts":56, "value":103} | ||
{"group":"B", "ts":12, "value":101} | ||
{"group":"B", "ts":13, "value":102} | ||
{"group":"B", "ts":14, "value":103} | ||
{"group":"B", "ts":15, "value":104} | ||
{"group":"B", "ts":16, "value":103} | ||
{"group":"B", "ts":17, "value":103} | ||
{"group":"B", "ts":18, "value":102} | ||
{"group":"B", "ts":19, "value":105} | ||
{"group":"B", "ts":22, "value":101} | ||
{"group":"B", "ts":23, "value":102} | ||
{"group":"B", "ts":34, "value":103} | ||
{"group":"B", "ts":35, "value":104} | ||
{"group":"B", "ts":36, "value":103} | ||
{"group":"B", "ts":37, "value":103} | ||
{"group":"B", "ts":48, "value":102} | ||
{"group":"B", "ts":49, "value":105} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
|
||
# ************************************************************************* | ||
# *** DO NOT TOUCH FILES IN THIS DIRECTORY! *** | ||
# *** FILES IN THIS DIRECTORY AND SUBDIRECTORIES CONSTITUTE A DERBY *** | ||
# *** DATABASE, WHICH INCLUDES THE DATA (USER AND SYSTEM) AND THE *** | ||
# *** FILES NECESSARY FOR DATABASE RECOVERY. *** | ||
# *** EDITING, ADDING, OR DELETING ANY OF THESE FILES MAY CAUSE DATA *** | ||
# *** CORRUPTION AND LEAVE THE DATABASE IN A NON-RECOVERABLE STATE. *** | ||
# ************************************************************************* |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
|
||
# ************************************************************************* | ||
# *** DO NOT TOUCH FILES IN THIS DIRECTORY! *** | ||
# *** FILES IN THIS DIRECTORY ARE USED BY THE DERBY DATABASE RECOVERY *** | ||
# *** SYSTEM. EDITING, ADDING, OR DELETING FILES IN THIS DIRECTORY *** | ||
# *** WILL CAUSE THE DERBY RECOVERY SYSTEM TO FAIL, LEADING TO *** | ||
# *** NON-RECOVERABLE CORRUPT DATABASES. *** | ||
# ************************************************************************* |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
|
||
# ************************************************************************* | ||
# *** DO NOT TOUCH FILES IN THIS DIRECTORY! *** | ||
# *** FILES IN THIS DIRECTORY ARE USED BY THE DERBY DATABASE TO STORE *** | ||
# *** USER AND SYSTEM DATA. EDITING, ADDING, OR DELETING FILES IN THIS *** | ||
# *** DIRECTORY WILL CORRUPT THE ASSOCIATED DERBY DATABASE AND MAKE *** | ||
# *** IT NON-RECOVERABLE. *** | ||
# ************************************************************************* |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#/Users/tmalaska/Documents/projects/spark_training/metastore_db | ||
# ******************************************************************** | ||
# *** Please do NOT edit this file. *** | ||
# *** CHANGING THE CONTENT OF THIS FILE MAY CAUSE DATA CORRUPTION. *** | ||
# ******************************************************************** | ||
#Sun Jul 30 14:48:13 PDT 2017 | ||
SysschemasIndex2Identifier=225 | ||
SyscolumnsIdentifier=144 | ||
SysconglomeratesIndex1Identifier=49 | ||
SysconglomeratesIdentifier=32 | ||
SyscolumnsIndex2Identifier=177 | ||
SysschemasIndex1Identifier=209 | ||
SysconglomeratesIndex3Identifier=81 | ||
SystablesIndex2Identifier=129 | ||
SyscolumnsIndex1Identifier=161 | ||
derby.serviceProtocol=org.apache.derby.database.Database | ||
SysschemasIdentifier=192 | ||
derby.storage.propertiesId=16 | ||
SysconglomeratesIndex2Identifier=65 | ||
derby.serviceLocale=en_US | ||
SystablesIdentifier=96 | ||
SystablesIndex1Identifier=113 | ||
#--- last line, don't put anything after this line --- |
Binary file not shown.
Binary file added
BIN
+48 Bytes
...ehouse/google_sample/.part-00000-7ceef0c4-db4d-4629-b639-2590458624e1-c000.gz.parquet.crc
Binary file not shown.
Empty file.
Binary file added
BIN
+4.57 KB
...k-warehouse/google_sample/part-00000-7ceef0c4-db4d-4629-b639-2590458624e1-c000.gz.parquet
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
src/main/scala/com/malaska/spark/training/timeseries/InfectionPointWindow.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package com.malaska.spark.training.timeseries | ||
|
||
import org.apache.log4j.{Level, Logger} | ||
import org.apache.spark.sql.SparkSession | ||
|
||
object InfectionPointWindow { | ||
Logger.getLogger("org").setLevel(Level.OFF) | ||
Logger.getLogger("akka").setLevel(Level.OFF) | ||
|
||
def main(args:Array[String]): Unit = { | ||
val leadLagJson = args(0) | ||
|
||
val isLocal = true | ||
|
||
val sparkSession = if (isLocal) { | ||
SparkSession.builder | ||
.master("local") | ||
.appName("my-spark-app") | ||
.config("spark.some.config.option", "config-value") | ||
.config("spark.driver.host", "127.0.0.1") | ||
.config("spark.sql.parquet.compression.codec", "gzip") | ||
.enableHiveSupport() | ||
.getOrCreate() | ||
} else { | ||
SparkSession.builder | ||
.appName("my-spark-app") | ||
.config("spark.some.config.option", "config-value") | ||
.enableHiveSupport() | ||
.getOrCreate() | ||
} | ||
println("---") | ||
|
||
import sparkSession.implicits._ | ||
|
||
val leadLag = sparkSession.read.json(leadLagJson).as[JsonLeadLag] | ||
|
||
leadLag.createOrReplaceTempView("leadlag") | ||
|
||
sparkSession.sql("select * from leadlag").collect().foreach(println) | ||
|
||
val leadLagDf = sparkSession.sql("SELECT " + | ||
"group, ts, " + | ||
"value as v_now, " + | ||
"AVG(value) OVER (ORDER BY ts rows between 3 preceding and current row) as v_moving_avg, " + | ||
"Min(value) OVER (ORDER BY ts rows between 3 preceding and current row) as v_moving_avg, " + | ||
"Max(value) OVER (ORDER BY ts rows between 3 preceding and current row) as v_moving_avg " + | ||
"FROM leadlag") | ||
|
||
leadLagDf.collect().foreach(println) | ||
|
||
} | ||
} |
65 changes: 65 additions & 0 deletions
65
src/main/scala/com/malaska/spark/training/timeseries/LeadLagExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package com.malaska.spark.training.timeseries | ||
|
||
import org.apache.log4j.{Level, Logger} | ||
import org.apache.spark.sql.SparkSession | ||
|
||
object LeadLagExample { | ||
Logger.getLogger("org").setLevel(Level.OFF) | ||
Logger.getLogger("akka").setLevel(Level.OFF) | ||
|
||
def main(args:Array[String]): Unit = { | ||
val leadLagJson = args(0) | ||
|
||
val isLocal = true | ||
|
||
val sparkSession = if (isLocal) { | ||
SparkSession.builder | ||
.master("local") | ||
.appName("my-spark-app") | ||
.config("spark.some.config.option", "config-value") | ||
.config("spark.driver.host", "127.0.0.1") | ||
.config("spark.sql.parquet.compression.codec", "gzip") | ||
.enableHiveSupport() | ||
.getOrCreate() | ||
} else { | ||
SparkSession.builder | ||
.appName("my-spark-app") | ||
.config("spark.some.config.option", "config-value") | ||
.enableHiveSupport() | ||
.getOrCreate() | ||
} | ||
println("---") | ||
|
||
import sparkSession.implicits._ | ||
|
||
val leadLag = sparkSession.read.json(leadLagJson).as[JsonLeadLag] | ||
|
||
leadLag.createOrReplaceTempView("leadlag") | ||
|
||
sparkSession.sql("select * from leadlag").collect().foreach(println) | ||
|
||
val leadLagDf = sparkSession.sql("SELECT " + | ||
"group, ts, " + | ||
"value as v_now, " + | ||
"LEAD(value) OVER (PARTITION BY group ORDER BY ts) as v_after, " + | ||
"LAG(value) OVER (PARTITION BY group ORDER BY ts) as v_before " + | ||
"FROM leadlag") | ||
|
||
leadLagDf.collect().foreach(println) | ||
|
||
leadLagDf.createOrReplaceTempView("leadlag_stage2") | ||
|
||
leadLagDf.printSchema() | ||
|
||
sparkSession.sql("select " + | ||
"group, ts, v_now, v_after, v_before, " + | ||
"case " + | ||
" when v_now < v_after and v_now < v_before then 'valley'" + | ||
" when v_now > v_after and v_now > v_before then 'peak'" + | ||
" else 'n/a' " + | ||
"end " + | ||
"from leadlag_stage2").collect().foreach(println) | ||
} | ||
} | ||
|
||
case class JsonLeadLag(group:String, ts:Long, value:Long) |
76 changes: 76 additions & 0 deletions
76
src/main/scala/com/malaska/spark/training/timeseries/SessionWindowing.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package com.malaska.spark.training.timeseries | ||
|
||
import org.apache.log4j.{Level, Logger} | ||
import org.apache.spark.sql.SparkSession | ||
|
||
import scala.collection.mutable | ||
|
||
object SessionWindowing { | ||
Logger.getLogger("org").setLevel(Level.OFF) | ||
Logger.getLogger("akka").setLevel(Level.OFF) | ||
|
||
def main(args:Array[String]): Unit = { | ||
val sessionJson = args(0) | ||
val timeGap = args(1).toInt | ||
|
||
val isLocal = true | ||
|
||
val sparkSession = if (isLocal) { | ||
SparkSession.builder | ||
.master("local") | ||
.appName("my-spark-app") | ||
.config("spark.some.config.option", "config-value") | ||
.config("spark.driver.host", "127.0.0.1") | ||
.config("spark.sql.parquet.compression.codec", "gzip") | ||
.enableHiveSupport() | ||
.getOrCreate() | ||
} else { | ||
SparkSession.builder | ||
.appName("my-spark-app") | ||
.config("spark.some.config.option", "config-value") | ||
.enableHiveSupport() | ||
.getOrCreate() | ||
} | ||
println("---") | ||
|
||
import sparkSession.implicits._ | ||
|
||
val sessionDs = sparkSession.read.json(sessionJson).as[JsonLeadLag] | ||
|
||
sessionDs.createOrReplaceTempView("session_table") | ||
|
||
sparkSession.sql("select * from session_table").collect().foreach(println) | ||
|
||
val sessionDefinitinonDf = sessionDs.rdd.map(r => { | ||
(r.group, r) | ||
}).groupByKey().flatMap{ case (group, jsonObjIt) => | ||
|
||
var lastStart:Long = -1 | ||
var lastEnd:Long = -1 | ||
var sessionCount = 1 | ||
var eventsInASession = 0 | ||
|
||
val sessionList = new mutable.MutableList[SessionDefinition] | ||
|
||
jsonObjIt.toSeq.sortBy(r => r.ts).foreach(record => { | ||
val ts = record.ts | ||
eventsInASession += 1 | ||
|
||
if (lastStart == -1) { | ||
lastStart = ts | ||
} else if (ts > lastEnd + timeGap) { | ||
sessionList += SessionDefinition(group, lastStart, lastEnd, lastEnd - lastStart, eventsInASession) | ||
lastStart = ts | ||
eventsInASession = 0 | ||
} | ||
lastEnd = ts | ||
}) | ||
sessionList | ||
} | ||
|
||
sessionDefinitinonDf.collect().foreach(println) | ||
|
||
} | ||
} | ||
|
||
case class SessionDefinition(group:String, sessionStart:Long, sessionEnd:Long, sessionLength:Long, sessionEvents:Int) |
Oops, something went wrong.