Skip to content

Commit 60614c7

Browse files
committed
add metadata
1 parent e42c452 commit 60614c7

File tree

2 files changed

+220
-0
lines changed

2 files changed

+220
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package org.apache.spark.sql.catalyst.util
2+
3+
import scala.collection.mutable
4+
5+
import org.json4s._
6+
import org.json4s.jackson.JsonMethods._
7+
8+
import scala.reflect.ClassTag
9+
10+
sealed class Metadata private[util] (val map: Map[String, Any]) extends Serializable {
11+
12+
def getInt(key: String): Int = get(key)
13+
14+
def getDouble(key: String): Double = get(key)
15+
16+
def getBoolean(key: String): Boolean = get(key)
17+
18+
def getString(key: String): String = get(key)
19+
20+
def getMetadata(key: String): Metadata = get(key)
21+
22+
def getIntArray(key: String): Array[Int] = getArray(key)
23+
24+
def getDoubleArray(key: String): Array[Double] = getArray(key)
25+
26+
def getBooleanArray(key: String): Array[Boolean] = getArray(key)
27+
28+
def getStringArray(key: String): Array[String] = getArray(key)
29+
30+
def getMetadataArray(key: String): Array[Metadata] = getArray(key)
31+
32+
def toJson: String = {
33+
compact(render(Metadata.toJValue(this)))
34+
}
35+
36+
private def get[T](key: String): T = {
37+
map(key).asInstanceOf[T]
38+
}
39+
40+
private def getArray[T: ClassTag](key: String): Array[T] = {
41+
map(key).asInstanceOf[Seq[T]].toArray
42+
}
43+
44+
override def toString: String = toJson
45+
}
46+
47+
object Metadata {
48+
49+
def empty: Metadata = new Metadata(Map.empty)
50+
51+
def fromJson(json: String): Metadata = {
52+
val map = parse(json).values.asInstanceOf[Map[String, Any]]
53+
fromMap(map.toMap)
54+
}
55+
56+
private def fromMap(map: Map[String, Any]): Metadata = {
57+
val builder = new MetadataBuilder
58+
map.foreach {
59+
case (key, value: Int) =>
60+
builder.putInt(key, value)
61+
case (key, value: BigInt) =>
62+
builder.putInt(key, value.toInt)
63+
case (key, value: Double) =>
64+
builder.putDouble(key, value)
65+
case (key, value: Boolean) =>
66+
builder.putBoolean(key, value)
67+
case (key, value: String) =>
68+
builder.putString(key, value)
69+
case (key, value: Map[_, _]) =>
70+
builder.putMetadata(key, fromMap(value.asInstanceOf[Map[String, Any]]))
71+
case (key, value: Seq[_]) =>
72+
if (value.isEmpty) {
73+
builder.putIntArray(key, Seq.empty)
74+
} else {
75+
value.head match {
76+
case _: Int =>
77+
builder.putIntArray(key, value.asInstanceOf[Seq[Int]].toSeq)
78+
case _: BigInt =>
79+
builder.putIntArray(key, value.asInstanceOf[Seq[BigInt]].map(_.toInt).toSeq)
80+
case _: Double =>
81+
builder.putDoubleArray(key, value.asInstanceOf[Seq[Double]].toSeq)
82+
case _: Boolean =>
83+
builder.putBooleanArray(key, value.asInstanceOf[Seq[Boolean]].toSeq)
84+
case _: String =>
85+
builder.putStringArray(key, value.asInstanceOf[Seq[String]].toSeq)
86+
case _: Map[String, Any] =>
87+
builder.putMetadataArray(
88+
key, value.asInstanceOf[Seq[Map[String, Any]]].map(fromMap).toSeq)
89+
case other =>
90+
throw new RuntimeException(s"Do not support array of type ${other.getClass}.")
91+
}
92+
}
93+
case other =>
94+
throw new RuntimeException(s"Do not support type ${other.getClass}.")
95+
}
96+
builder.build()
97+
}
98+
99+
private def toJValue(obj: Any): JValue = {
100+
obj match {
101+
case map: Map[_, _] =>
102+
val fields = map.toList.map { case (k: String, v) => (k, toJValue(v)) }
103+
JObject(fields)
104+
case arr: Seq[_] =>
105+
val values = arr.toList.map(toJValue)
106+
JArray(values)
107+
case x: Int =>
108+
JInt(x)
109+
case x: Double =>
110+
JDouble(x)
111+
case x: Boolean =>
112+
JBool(x)
113+
case x: String =>
114+
JString(x)
115+
case x: Metadata =>
116+
toJValue(x.map)
117+
case other =>
118+
throw new RuntimeException(s"Do not support type ${other.getClass}.")
119+
}
120+
}
121+
}
122+
123+
class MetadataBuilder {
124+
125+
private val map: mutable.Map[String, Any] = mutable.Map.empty
126+
127+
def withMetadata(metadata: Metadata): this.type = {
128+
map ++= metadata.map
129+
this
130+
}
131+
132+
def putInt(key: String, value: Int): this.type = put(key, value)
133+
134+
def putDouble(key: String, value: Double): this.type = put(key, value)
135+
136+
def putBoolean(key: String, value: Boolean): this.type = put(key, value)
137+
138+
def putString(key: String, value: String): this.type = put(key, value)
139+
140+
def putMetadata(key: String, value: Metadata): this.type = put(key, value)
141+
142+
def putIntArray(key: String, value: Seq[Int]): this.type = put(key, value)
143+
144+
def putDoubleArray(key: String, value: Seq[Double]): this.type = put(key, value)
145+
146+
def putBooleanArray(key: String, value: Seq[Boolean]): this.type = put(key, value)
147+
148+
def putStringArray(key: String, value: Seq[String]): this.type = put(key, value)
149+
150+
def putMetadataArray(key: String, value: Seq[Metadata]): this.type = put(key, value)
151+
152+
def build(): Metadata = {
153+
new Metadata(map.toMap)
154+
}
155+
156+
private def put(key: String, value: Any): this.type = {
157+
map.put(key, value)
158+
this
159+
}
160+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package org.apache.spark.sql.catalyst.util
2+
3+
import org.json4s.jackson.JsonMethods._
4+
import org.scalatest.FunSuite
5+
6+
class MetadataSuite extends FunSuite {
7+
8+
val baseMetadata = new MetadataBuilder()
9+
.putString("purpose", "ml")
10+
.build()
11+
12+
val summary = new MetadataBuilder()
13+
.putInt("numFeatures", 10)
14+
.build()
15+
16+
val age = new MetadataBuilder()
17+
.putString("name", "age")
18+
.putInt("index", 1)
19+
.putBoolean("categorical", false)
20+
.putDouble("average", 45.0)
21+
.build()
22+
23+
val gender = new MetadataBuilder()
24+
.putString("name", "gender")
25+
.putInt("index", 5)
26+
.putBoolean("categorical", true)
27+
.putStringArray("categories", Seq("male", "female"))
28+
.build()
29+
30+
val metadata = new MetadataBuilder()
31+
.withMetadata(baseMetadata)
32+
.putMetadata("summary", summary)
33+
.putIntArray("int[]", Seq(0, 1))
34+
.putDoubleArray("double[]", Seq(3.0, 4.0))
35+
.putBooleanArray("boolean[]", Seq(true, false))
36+
.putMetadataArray("features", Seq(age, gender))
37+
.build()
38+
39+
test("metadata builder and getters") {
40+
assert(age.getInt("index") === 1)
41+
assert(age.getDouble("average") === 45.0)
42+
assert(age.getBoolean("categorical") === false)
43+
assert(age.getString("name") === "age")
44+
assert(metadata.getString("purpose") === "ml")
45+
assert(metadata.getMetadata("summary") === summary)
46+
assert(metadata.getIntArray("int[]").toSeq === Seq(0, 1))
47+
assert(metadata.getDoubleArray("double[]").toSeq === Seq(3.0, 4.0))
48+
assert(metadata.getBooleanArray("boolean[]").toSeq === Seq(true, false))
49+
assert(gender.getStringArray("categories").toSeq === Seq("male", "female"))
50+
assert(metadata.getMetadataArray("features").toSeq === Seq(age, gender))
51+
}
52+
53+
test("metadata json conversion") {
54+
val json = metadata.toJson
55+
withClue("toJson must produce a valid JSON string") {
56+
parse(json)
57+
}
58+
assert(Metadata.fromJson(json) === metadata)
59+
}
60+
}

0 commit comments

Comments
 (0)