Skip to content

Commit aeb7801

Browse files
avoid multiple json generator created
1 parent 4396dfb commit aeb7801

File tree

3 files changed

+29
-16
lines changed

3 files changed

+29
-16
lines changed

sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql
1919

20+
import java.io.CharArrayWriter
21+
2022
import scala.language.implicitConversions
2123
import scala.reflect.ClassTag
2224
import scala.collection.JavaConversions._
@@ -409,8 +411,26 @@ private[sql] class DataFrameImpl protected[sql](
409411
override def toJSON: RDD[String] = {
410412
val rowSchema = this.schema
411413
this.mapPartitions { iter =>
412-
val jsonFactory = new JsonFactory()
413-
iter.map(JsonRDD.rowToJSON(rowSchema, jsonFactory))
414+
val writer = new CharArrayWriter()
415+
// create the Generator without separator inserted between 2 records
416+
val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
417+
418+
new Iterator[String] {
419+
override def hasNext() = iter.hasNext
420+
override def next(): String = {
421+
JsonRDD.rowToJSON(rowSchema, gen)(iter.next())
422+
gen.flush()
423+
424+
val json = writer.toString
425+
if (hasNext) {
426+
writer.reset()
427+
} else {
428+
gen.close()
429+
}
430+
431+
json
432+
}
433+
}
414434
}
415435
}
416436

sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ import java.sql.{Date, Timestamp}
2323
import scala.collection.Map
2424
import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
2525

26-
import com.fasterxml.jackson.core.JsonProcessingException
27-
import com.fasterxml.jackson.core.JsonFactory
26+
import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException, JsonFactory}
2827
import com.fasterxml.jackson.databind.ObjectMapper
2928

3029
import org.apache.spark.rdd.RDD
@@ -430,14 +429,11 @@ private[sql] object JsonRDD extends Logging {
430429

431430
/** Transforms a single Row to JSON using Jackson
432431
*
433-
* @param jsonFactory a JsonFactory object to construct a JsonGenerator
434432
* @param rowSchema the schema object used for conversion
433+
* @param gen a JsonGenerator object
435434
* @param row The row to convert
436435
*/
437-
private[sql] def rowToJSON(rowSchema: StructType, jsonFactory: JsonFactory)(row: Row): String = {
438-
val writer = new StringWriter()
439-
val gen = jsonFactory.createGenerator(writer)
440-
436+
private[sql] def rowToJSON(rowSchema: StructType, gen: JsonGenerator)(row: Row) = {
441437
def valWriter: (DataType, Any) => Unit = {
442438
case (_, null) | (NullType, _) => gen.writeNull()
443439
case (StringType, v: String) => gen.writeString(v)
@@ -479,8 +475,5 @@ private[sql] object JsonRDD extends Logging {
479475
}
480476

481477
valWriter(rowSchema, row)
482-
gen.close()
483-
writer.toString
484478
}
485-
486479
}

sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -826,8 +826,8 @@ class JsonSuite extends QueryTest {
826826
df1.registerTempTable("applySchema1")
827827
val df2 = df1.toDataFrame
828828
val result = df2.toJSON.collect()
829-
assert(result(0) == "{\"f1\":1,\"f2\":\"A1\",\"f3\":true,\"f4\":[\"1\",\" A1\",\" true\",\" null\"]}")
830-
assert(result(3) == "{\"f1\":4,\"f2\":\"D4\",\"f3\":true,\"f4\":[\"4\",\" D4\",\" true\",\" 2147483644\"],\"f5\":2147483644}")
829+
assert(result(0) === "{\"f1\":1,\"f2\":\"A1\",\"f3\":true,\"f4\":[\"1\",\" A1\",\" true\",\" null\"]}")
830+
assert(result(3) === "{\"f1\":4,\"f2\":\"D4\",\"f3\":true,\"f4\":[\"4\",\" D4\",\" true\",\" 2147483644\"],\"f5\":2147483644}")
831831

832832
val schema2 = StructType(
833833
StructField("f1", StructType(
@@ -848,8 +848,8 @@ class JsonSuite extends QueryTest {
848848
val df4 = df3.toDataFrame
849849
val result2 = df4.toJSON.collect()
850850

851-
assert(result2(1) == "{\"f1\":{\"f11\":2,\"f12\":false},\"f2\":{\"B2\":null}}")
852-
assert(result2(3) == "{\"f1\":{\"f11\":4,\"f12\":true},\"f2\":{\"D4\":2147483644}}")
851+
assert(result2(1) === "{\"f1\":{\"f11\":2,\"f12\":false},\"f2\":{\"B2\":null}}")
852+
assert(result2(3) === "{\"f1\":{\"f11\":4,\"f12\":true},\"f2\":{\"D4\":2147483644}}")
853853

854854
val jsonDF = jsonRDD(primitiveFieldAndType)
855855
val primTable = jsonRDD(jsonDF.toJSON)

0 commit comments

Comments
 (0)