Skip to content

Commit 349c726

Browse files
committed
Merge pull request apache#48 in BDP/apache-spark from netlix/2.1.1-unstable/cl to netflix/2.1.1-unstable
Squashed commit of the following: commit 8053563b1d7f0cd21ea34263ebdf017ba309be90 Merge: b8acef1 1035c8d Author: Abhay <aamin@netflix.com> Date: Wed Mar 20 23:21:19 2019 -0700 Merge remote-tracking branch 'origin/netflix/2.1.1-unstable' into netlix/2.1.1-unstable/cl commit b8acef18a6421bafbe7e7080706493cc143dc124 Merge: a1842a6 93934ec Author: Abhay <aamin@netflix.com> Date: Wed Mar 20 23:09:30 2019 -0700 Merge branch 'netlix/2.1.1-unstable/cl' of ssh://stash.corp.netflix.com:7999/bdp/apache-spark into netlix/2.1.1-unstable/cl commit a1842a69ff5bf706319acc7cf7d4182eb8112bd9 Author: Abhay <aamin@netflix.com> Date: Wed Mar 20 22:49:48 2019 -0700 fix typo for test class name commit 401024f6e9a41e885d1ead816db66e6c870c07b7 Author: Abhay <aamin@netflix.com> Date: Mon Mar 18 12:00:12 2019 -0700 add cl_snapshot_extract udf for spark native commit 93934ecc9be6eafeebd7b0bd85124725eb77d831 Author: Abhay <aamin@netflix.com> Date: Wed Mar 20 22:49:48 2019 -0700 fix typo for test class name commit 45d51c59bbe5250feff325c3540bfab899b83174 Author: Abhay <aamin@netflix.com> Date: Mon Mar 18 12:00:12 2019 -0700 add cl_snapshot_extract udf for spark native
1 parent 1035c8d commit 349c726

File tree

7 files changed

+384
-0
lines changed

7 files changed

+384
-0
lines changed

python/pyspark/sql/functions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1464,6 +1464,14 @@ def nf_quarter(input):
14641464
sc = SparkContext._active_spark_context
14651465
return Column(sc._jvm.functions.nf_quarter(_to_java_column(input)))
14661466

1467+
@since(2.1)
1468+
def cl_snapshot_extract(input1, input2, input3, input4):
1469+
"""
1470+
Extracts desired values from cl snapshot based on clType, extractCriteria and filterCriteria.
1471+
Refer http://go/cludfs
1472+
"""
1473+
sc = SparkContext._active_spark_context
1474+
return Column(sc._jvm.functions.cl_snapshot_extract(_to_java_column(input1), input2, input3, input4))
14671475

14681476
# ---------------------------- misc functions ----------------------------------
14691477

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,9 @@ object FunctionRegistry {
386386
expression[NfJsonExtract]("nf_json_extract"),
387387
expression[NfJsonExtractArray]("nf_json_extract_array"),
388388

389+
// Cl functions
390+
expression[ClSnapshotExtract]("cl_snapshot_extract"),
391+
389392
// collection functions
390393
expression[CreateArray]("array"),
391394
expression[ArrayContains]("array_contains"),
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.expressions
19+
import java.util.ArrayList
20+
21+
import scala.collection.JavaConverters._
22+
import scala.collection.mutable.ArrayBuffer
23+
24+
import org.apache.spark.sql.catalyst.InternalRow
25+
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
26+
import org.apache.spark.sql.catalyst.util.ArrayData
27+
import org.apache.spark.sql.catalyst.util.NetflixClUtils._
28+
import org.apache.spark.sql.catalyst.util.NetflixJsonUtils._
29+
import org.apache.spark.sql.types.{
30+
AbstractDataType,
31+
ArrayType,
32+
DataType,
33+
StringType
34+
}
35+
import org.apache.spark.unsafe.types.UTF8String
36+
37+
/**
38+
* Extracts values from a cl snapshot based on clType, extractCriteria, and filterCriteria
39+
* and returns array of strings. It will return null if the input parameters are invalid.
40+
*/
41+
@ExpressionDescription(
42+
usage =
43+
"_FUNC_(json, clType, filterCriteria, extractCriteria) - " +
44+
"Extracts values from cl snapshot, http://go/cludfs."
45+
)
46+
case class ClSnapshotExtract(snapshot: Expression,
47+
clType: Expression,
48+
extractCriteria: Expression,
49+
filterCriteria: Expression)
50+
extends ExpectsInputTypes
51+
with CodegenFallback {
52+
override def children: Seq[Expression] =
53+
Seq(snapshot, clType, extractCriteria, filterCriteria)
54+
55+
override def inputTypes: Seq[AbstractDataType] =
56+
Seq(StringType, StringType, StringType, StringType)
57+
override def dataType: DataType = ArrayType(StringType)
58+
override def nullable: Boolean = true
59+
override def prettyName: String = "cl_snapshot_extract"
60+
61+
override def eval(input: InternalRow): Any = {
62+
val extractedValue =
63+
snapshotExtract(input, snapshot, clType, extractCriteria, filterCriteria)
64+
val result = new ArrayBuffer[UTF8String]
65+
if (extractedValue.isInstanceOf[ArrayList[Any]]) {
66+
val matchesArray = extractedValue.asInstanceOf[ArrayList[Any]]
67+
for (data <- matchesArray.asScala) {
68+
result.append(getJsonAsString(data))
69+
}
70+
} else {
71+
result.append(getJsonAsString(extractedValue))
72+
}
73+
ArrayData.toArrayData(result.toArray)
74+
}
75+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.util
19+
20+
import scala.collection.immutable.HashSet
21+
22+
import com.jayway.jsonpath.{JsonPath, JsonPathException}
23+
24+
import org.apache.spark.sql.catalyst.InternalRow
25+
import org.apache.spark.sql.catalyst.expressions.Expression
26+
import org.apache.spark.unsafe.types.UTF8String
27+
28+
object NetflixClUtils {
29+
30+
private val relationalOperators = HashSet("==", "!=", ">=", "<=", ">", "<")
31+
32+
def generateClSnapshotJaywayJsonPath(clType: String,
33+
extractCriteria: String,
34+
filterCriteria: String): String = {
35+
val jsonPathifiedExtractCriteria =
36+
Option(extractCriteria).getOrElse("") match {
37+
case x if x.nonEmpty => s".$x"
38+
case _ => ""
39+
}
40+
41+
val jsonPathifiedFilterCriteria =
42+
Option(filterCriteria).getOrElse("") match {
43+
case x if x.nonEmpty =>
44+
val tokens = filterCriteria
45+
.split(
46+
"((?<===|>|<|>=|<=|!=|&&|\\|\\||\\(|\\)) *|(?===|>|<|>=|<=|!=|&&|\\|\\||\\(|\\))) *")
47+
.toList
48+
" && (" + tokens
49+
.sliding(2)
50+
.toList
51+
.map(e =>
52+
if (relationalOperators
53+
.contains(e.last)) { s"@.${e.head.trim}" } else {
54+
s"${e.head.trim}"
55+
})
56+
.mkString + tokens.last.trim + ")"
57+
case _ => ""
58+
}
59+
60+
"$[?(\"" + Option(clType).getOrElse("") +
61+
"\" in @.type" + jsonPathifiedFilterCriteria + ")]" + jsonPathifiedExtractCriteria
62+
}
63+
64+
@throws(classOf[JsonPathException])
65+
def snapshotExtract(input: InternalRow,
66+
json: Expression,
67+
clType: Expression,
68+
extractCriteria: Expression,
69+
filterCriteria: Expression): Any = {
70+
val snapshot = json.eval(input).asInstanceOf[UTF8String]
71+
if (snapshot == null) {
72+
return null
73+
}
74+
75+
if (clType.eval().asInstanceOf[UTF8String] == null) {
76+
throw new IllegalArgumentException(
77+
"`cl_snapshot_extract` must be supplied with " +
78+
"a valid `clType` from http://go/cl, refer http://go/cludfs")
79+
}
80+
81+
val jsonPath = generateClSnapshotJaywayJsonPath(
82+
clType.eval().asInstanceOf[UTF8String].toString,
83+
extractCriteria.eval().asInstanceOf[UTF8String].toString,
84+
filterCriteria.eval().asInstanceOf[UTF8String].toString
85+
)
86+
JsonPath.parse(snapshot.toString).read(jsonPath.toString)
87+
}
88+
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.expressions
19+
20+
import org.apache.spark.SparkFunSuite
21+
22+
class NetflixClExpressionsSuit extends SparkFunSuite with ExpressionEvalHelper {
23+
24+
private val snapshot =
25+
"""
26+
|[{"sequence":1,"id":757294327146741,"source":"iOS","schema":{"name":"App","version":"1.26.0"},"type":["Log","Session"],"time":559440041005},
27+
|{"type":["ProcessState"],"id":757294397275504,"computation":"none","allocation":"none","interaction":"none"},
28+
|{"type":["ProcessState"],"id":757294425125683,"computation":"normal","allocation":"normal","interaction":"direct"},
29+
|{"sequence":3,"id":757294439218544,"type":["ProcessStateTransition","Action","Session"],"time":559440041006},
30+
|{"nfvdid":"blahblablb","id":757294485523660,"sequence":4,"type":["VisitorDeviceId","AccountIdentity","Session"],"time":559440041008},
31+
|{"sequence":5,"id":757294513373839,"type":["UserInteraction","Session"],"time":559440041008},
32+
|{"model":"APPLE_iPhone9-1","type":["Device"],"id":757294562698854},
33+
|{"type":["OsVersion"],"osVersion":"11.4.1","id":757294575785082},
34+
|{"appVersion":"11.2.0","type":["AppVersion"],"id":757294615379312},
35+
|{"uiVersion":"ios11.2.0 (2265)","type":["UiVersion"],"id":757294643565035},
36+
|{"esn":"","type":["Esn"],"id":757294732484280},
37+
|{"type":["NrdLib"],"id":757294743557242,"appVersion":"11.2.0 (2265)","sdkVersion":"2012.4","libVersion":"2012.4"},
38+
|{"userAgent":" App/11.2.0 ( iOS 11.4.1 )","type":["UserAgent"],"id":757294786171371},
39+
|{"utcOffset":-18000,"type":["TimeZone"],"id":757294829121044},
40+
|{"muting":false,"id":757294867037552,"level":0.875,"type":["Volume"]},
41+
|{"type":["WifiConnection","NetworkConnection"],"id":757294904954060},
42+
|{"type":["UiLocale"],"uiLocale":"en","id":757294954950164},
43+
|{"cells":{"7972":1,"10953":4},"type":["TestAllocations"],"id":757294995551027},
44+
|{"trackingInfo":{"videoId":12345,"trackId":9087,"imageKey":"test1"},"sequence":9,"id":757295011213817,"view":"browseTitles","type":["Presentation","Session"],"time":559440043683},
45+
|{"trackingInfo":{"surveyResponse":1,"surveyIdentifier":"IO_80203147"},"sequence":11,"id":757295111313817,"view":"homeTab","type":["Focus","Session"],"time":559440043683}]
46+
""".stripMargin.replace("\n", "")
47+
48+
test("function with only `clType`") {
49+
val result = List(
50+
"{\"id\":757295111313817,\"sequence\":11,\"time\":559440043683,\"trackingInfo\":{\"surveyIdentifier\":\"IO_80203147\",\"surveyResponse\":1},\"type\":[\"Focus\",\"Session\"],\"view\":\"homeTab\"}"
51+
)
52+
checkEvaluation(ClSnapshotExtract(Literal(snapshot),
53+
Literal("Focus"),
54+
Literal(""),
55+
Literal("")),
56+
result)
57+
val result1 = List(
58+
"{\"id\":757294327146741,\"schema\":{\"name\":\"App\",\"version\":\"1.26.0\"},\"sequence\":1,\"source\":\"iOS\",\"time\":559440041005,\"type\":[\"Log\",\"Session\"]}",
59+
"{\"id\":757294439218544,\"sequence\":3,\"time\":559440041006,\"type\":[\"ProcessStateTransition\",\"Action\",\"Session\"]}",
60+
"{\"id\":757294485523660,\"nfvdid\":\"blahblablb\",\"sequence\":4,\"time\":559440041008,\"type\":[\"VisitorDeviceId\",\"AccountIdentity\",\"Session\"]}",
61+
"{\"id\":757294513373839,\"sequence\":5,\"time\":559440041008,\"type\":[\"UserInteraction\",\"Session\"]}",
62+
"{\"id\":757295011213817,\"sequence\":9,\"time\":559440043683,\"trackingInfo\":{\"imageKey\":\"test1\",\"trackId\":9087,\"videoId\":12345},\"type\":[\"Presentation\",\"Session\"],\"view\":\"browseTitles\"}",
63+
"{\"id\":757295111313817,\"sequence\":11,\"time\":559440043683,\"trackingInfo\":{\"surveyIdentifier\":\"IO_80203147\",\"surveyResponse\":1},\"type\":[\"Focus\",\"Session\"],\"view\":\"homeTab\"}"
64+
)
65+
checkEvaluation(ClSnapshotExtract(Literal(snapshot),
66+
Literal("Session"),
67+
Literal(""),
68+
Literal("")),
69+
result1)
70+
}
71+
72+
test("function with `clType` and `extractCriteria`") {
73+
val result = List("\"homeTab\"")
74+
checkEvaluation(ClSnapshotExtract(Literal(snapshot),
75+
Literal("Focus"),
76+
Literal("view"),
77+
Literal("")),
78+
result)
79+
val result1 = List("1")
80+
checkEvaluation(ClSnapshotExtract(Literal(snapshot),
81+
Literal("Focus"),
82+
Literal("trackingInfo.surveyResponse"),
83+
Literal("")),
84+
result1)
85+
}
86+
87+
test("function with all the arguments present") {
88+
val result =
89+
List("{\"surveyIdentifier\":\"IO_80203147\",\"surveyResponse\":1}")
90+
checkEvaluation(ClSnapshotExtract(Literal(snapshot),
91+
Literal("Focus"),
92+
Literal("trackingInfo"),
93+
Literal("view==\"homeTab\"")),
94+
result)
95+
val result1 = List("\"IO_80203147\"")
96+
checkEvaluation(
97+
ClSnapshotExtract(
98+
Literal(snapshot),
99+
Literal("Focus"),
100+
Literal("trackingInfo.surveyIdentifier"),
101+
Literal("view==\"homeTab\" && trackingInfo.surveyResponse==1")),
102+
result1
103+
)
104+
val result3 = List()
105+
checkEvaluation(
106+
ClSnapshotExtract(
107+
Literal(snapshot),
108+
Literal("Focus"),
109+
Literal("trackingInfo.surveyIdentifier"),
110+
Literal("view==\"browseTitles\" && trackingInfo.surveyResponse==1")),
111+
result3
112+
)
113+
val result4 = List("\"test1\"")
114+
checkEvaluation(
115+
ClSnapshotExtract(
116+
Literal(snapshot),
117+
Literal("Presentation"),
118+
Literal("trackingInfo.imageKey"),
119+
Literal(
120+
"view==\"browseTitles\" && ( trackingInfo.trackId==9087 || trackingInfo.videoId == 12345 )")
121+
),
122+
result4
123+
)
124+
}
125+
126+
test("function without `clType` parameter") {
127+
try {
128+
ClSnapshotExtract(
129+
Literal(snapshot),
130+
Literal(null),
131+
Literal("trackingInfo.imageKey"),
132+
Literal(
133+
"view==\"browseTitles\" && ( trackingInfo.trackId==9087 || trackingInfo.videoId == 12345 )")
134+
)
135+
} catch {
136+
case _: IllegalArgumentException =>
137+
}
138+
}
139+
140+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.util
19+
20+
import org.apache.spark.SparkFunSuite
21+
import org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper
22+
23+
class NetflixClUtilsSuite extends SparkFunSuite with ExpressionEvalHelper {
24+
test("jsonpath with only `clType`") {
25+
assertResult(
26+
NetflixClUtils.generateClSnapshotJaywayJsonPath("Focus", "", ""))(
27+
"$[?(\"Focus\" in @.type)]")
28+
assertResult(
29+
NetflixClUtils.generateClSnapshotJaywayJsonPath("Session", "", ""))(
30+
"$[?(\"Session\" in @.type)]")
31+
}
32+
33+
test("jsonpath with `clType` and `extractCriteria`") {
34+
assertResult(
35+
NetflixClUtils.generateClSnapshotJaywayJsonPath("Focus", "view", null))(
36+
"$[?(\"Focus\" in @.type)].view")
37+
assertResult(
38+
NetflixClUtils.generateClSnapshotJaywayJsonPath(
39+
"Focus",
40+
"trackingInfo.surveyResponse",
41+
null))("$[?(\"Focus\" in @.type)].trackingInfo.surveyResponse")
42+
}
43+
44+
test("jsonpath with all arguments") {
45+
assertResult(
46+
NetflixClUtils.generateClSnapshotJaywayJsonPath("Focus",
47+
"trackingInfo",
48+
"view==\"homeTab\""))(
49+
"$[?(\"Focus\" in @.type && (@.view==\"homeTab\"))].trackingInfo"
50+
)
51+
assertResult(
52+
NetflixClUtils.generateClSnapshotJaywayJsonPath(
53+
"Focus",
54+
"trackingInfo.surveyIdentifier",
55+
"view==\"homeTab\" && trackingInfo.surveyResponse==1"))(
56+
"$[?(\"Focus\" in @.type && (@.view==\"homeTab\"&&@.trackingInfo.surveyResponse==1))].trackingInfo.surveyIdentifier"
57+
)
58+
assertResult(
59+
NetflixClUtils.generateClSnapshotJaywayJsonPath(
60+
"Presentation",
61+
"trackingInfo.imageKey",
62+
"view==\"browseTitles\" && ( trackingInfo.trackId==9087 || trackingInfo.videoId == 12345 )"))(
63+
"$[?(\"Presentation\" in @.type && (@.view==\"browseTitles\"&&(@.trackingInfo.trackId==9087||@.trackingInfo.videoId==12345)))].trackingInfo.imageKey"
64+
)
65+
}
66+
}

0 commit comments

Comments
 (0)