Skip to content

Commit

Permalink
Merge pull request #331 from gnieh/performances/jsonpatch-diff
Browse files Browse the repository at this point in the history
Improve JsonPatch diff performances
  • Loading branch information
satabin authored Mar 2, 2023
2 parents c2e8c82 + 97aee07 commit 6bdff5b
Show file tree
Hide file tree
Showing 11 changed files with 164 additions and 107 deletions.
22 changes: 11 additions & 11 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ jobs:

- name: Make target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
run: mkdir -p circe/jvm/target testkit/native/target target testkit/js/target .js/target core/.native/target playJson/jvm/target sprayJson/target core/.js/target circe/js/target core/.jvm/target .jvm/target .native/target circe/native/target playJson/js/target testkit/jvm/target project/target
run: mkdir -p circe/jvm/target testkit/native/target target testkit/js/target .js/target core/.native/target playJson/jvm/target benchmarks/.jvm/target sprayJson/target core/.js/target circe/js/target core/.jvm/target .jvm/target .native/target circe/native/target playJson/js/target testkit/jvm/target project/target

- name: Compress target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
run: tar cf targets.tar circe/jvm/target testkit/native/target target testkit/js/target .js/target core/.native/target playJson/jvm/target sprayJson/target core/.js/target circe/js/target core/.jvm/target .jvm/target .native/target circe/native/target playJson/js/target testkit/jvm/target project/target
run: tar cf targets.tar circe/jvm/target testkit/native/target target testkit/js/target .js/target core/.native/target playJson/jvm/target benchmarks/.jvm/target sprayJson/target core/.js/target circe/js/target core/.jvm/target .jvm/target .native/target circe/native/target playJson/js/target testkit/jvm/target project/target

- name: Upload target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
Expand Down Expand Up @@ -181,32 +181,32 @@ jobs:
tar xf targets.tar
rm targets.tar
- name: Download target directories (2.13.9, rootJS)
- name: Download target directories (2.13.10, rootJS)
uses: actions/download-artifact@v3
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.9-rootJS
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.10-rootJS

- name: Inflate target directories (2.13.9, rootJS)
- name: Inflate target directories (2.13.10, rootJS)
run: |
tar xf targets.tar
rm targets.tar
- name: Download target directories (2.13.9, rootJVM)
- name: Download target directories (2.13.10, rootJVM)
uses: actions/download-artifact@v3
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.9-rootJVM
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.10-rootJVM

- name: Inflate target directories (2.13.9, rootJVM)
- name: Inflate target directories (2.13.10, rootJVM)
run: |
tar xf targets.tar
rm targets.tar
- name: Download target directories (2.13.9, rootNative)
- name: Download target directories (2.13.10, rootNative)
uses: actions/download-artifact@v3
with:
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.9-rootNative
name: target-${{ matrix.os }}-${{ matrix.java }}-2.13.10-rootNative

- name: Inflate target directories (2.13.9, rootNative)
- name: Inflate target directories (2.13.10, rootNative)
run: |
tar xf targets.tar
rm targets.tar
Expand Down
16 changes: 13 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ syntax: glob
.idea
*.iml

#bsp
.bsp

# building
target
out
build
null
tmp*
Expand All @@ -51,3 +49,15 @@ build.log
#ensime
.ensime*
ensime.sbt

# website
site/content/api
site/content/documentation/
site/output

.metals/
.bloop/
.bsp/
metals.sbt

.vscode/settings.json
2 changes: 1 addition & 1 deletion .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = "3.7.2"
version = "3.7.1"
maxColumn = 120
danglingParentheses.preset = false
align.preset = some
Expand Down
58 changes: 58 additions & 0 deletions benchmarks/src/main/scala/diffson/PatienceBenchmarks.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package diffson

import org.openjdk.jmh.annotations.State
import org.openjdk.jmh.annotations.Scope
import org.openjdk.jmh.annotations.BenchmarkMode
import org.openjdk.jmh.annotations.Mode
import org.openjdk.jmh.annotations.Fork
import org.openjdk.jmh.annotations.Warmup
import org.openjdk.jmh.annotations.Measurement

import diffson.circe._
import diffson.jsonpatch.lcsdiff._
import diffson.lcs._

import io.circe.syntax._
import io.circe.Json
import org.openjdk.jmh.annotations.Benchmark

@BenchmarkMode(Array(Mode.Throughput))
@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3, time = 2)
@Measurement(iterations = 5, time = 2)
class PatienceBenchmarks {

implicit val lcs = new Patience[Json]

private def createJson(depth: Int, arrayStep: Int) =
List
.range(depth, 0, -1)
.foldLeft(Json.obj("array" := List.range(0, 1000, arrayStep).map(n => Json.obj("n" := n, "other" := "common")))) {
(acc, idx) =>
Json.obj(s"key$idx" := acc, "other" := arrayStep)
}

def array(size: Int, step: Int) =
Json.obj("array" := List.range(0, size, step))

val deep1 =
createJson(100, 1)

val deep2 =
createJson(100, 2)

val array1 =
array(1000, 2)

val array2 =
array(1000, 1)

@Benchmark
def diffArray() =
diff(array1, array2)

@Benchmark
def diffDeep() =
diff(deep1, deep2)
}
13 changes: 13 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,16 @@ lazy val circe = crossProject(JSPlatform, JVMPlatform, NativePlatform)
)
)
.dependsOn(core, testkit % Test)

lazy val benchmarks = crossProject(JVMPlatform)
.crossType(CrossType.Pure)
.in(file("benchmarks"))
.enablePlugins(NoPublishPlugin, JmhPlugin)
.settings(commonSettings: _*)
.settings(
name := "diffson-benchmarks",
libraryDependencies ++= Seq(
"io.circe" %% "circe-literal" % circeVersion
)
)
.dependsOn(circe)
114 changes: 43 additions & 71 deletions core/src/main/scala/diffson/jsonpatch/JsonDiff.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,73 +20,45 @@ package jsonpatch
import lcs._
import jsonpointer._

import cats.implicits._
import cats.syntax.all._
import cats.data.Chain
import cats.Eval

import scala.annotation.tailrec

class JsonDiff[Json](diffArray: Boolean, rememberOld: Boolean)(implicit J: Jsony[Json], Lcs: Lcs[Json])
extends Diff[Json, JsonPatch[Json]] {
def diff(json1: Json, json2: Json): JsonPatch[Json] =
JsonPatch(diff(json1, json2, Pointer.Root))
JsonPatch(diff(json1, json2, Pointer.Root).value.toList)

private def diff(json1: Json, json2: Json, pointer: Pointer): List[Operation[Json]] =
if (json1 === json2)
// if they are equal, this one is easy...
Nil
else
(json1, json2) match {
case (JsObject(fields1), JsObject(fields2)) => fieldsDiff(fields1.toList, fields2.toList, pointer)
case (JsArray(arr1), JsArray(arr2)) if diffArray => arraysDiff(arr1.toList, arr2.toList, pointer)
case (_, _) => List(Replace(pointer, json2, if (rememberOld) Some(json1) else None))
}
private def diff(json1: Json, json2: Json, pointer: Pointer): Eval[Chain[Operation[Json]]] =
(json1, json2) match {
case (JsObject(fields1), JsObject(fields2)) => fieldsDiff(fields1.toList, fields2, pointer)
case (JsArray(arr1), JsArray(arr2)) if diffArray => arraysDiff(arr1.toList, arr2.toList, pointer)
case _ if json1 === json2 =>
// if they are equal, this one is easy...
Eval.now(Chain.empty)
case _ => Eval.now(Chain.one(Replace(pointer, json2, if (rememberOld) Some(json1) else None)))
}

private def fieldsDiff(fields1: List[(String, Json)],
fields2: List[(String, Json)],
path: Pointer): List[Operation[Json]] = {
// sort fields by name in both objects
val sorted1 = fields1.sortBy(_._1)
val sorted2 = fields2.sortBy(_._1)
@tailrec
def associate(fields1: List[(String, Json)],
fields2: List[(String, Json)],
acc: List[(Option[(String, Json)], Option[(String, Json)])])
: List[(Option[(String, Json)], Option[(String, Json)])] = (fields1, fields2) match {
case (f1 :: t1, f2 :: t2) if f1._1 == f2._1 =>
// same name, associate both
associate(t1, t2, (Some(f1), Some(f2)) :: acc)
case (f1 :: t1, f2 :: _) if f1._1 < f2._1 =>
// the first field is not present in the second object
associate(t1, fields2, (Some(f1), None) :: acc)
case (_ :: _, f2 :: t2) =>
// the second field is not present in the first object
associate(fields1, t2, (None, Some(f2)) :: acc)
case (_, Nil) =>
fields1.map(Some(_) -> None) ::: acc
case (Nil, _) =>
fields2.map(None -> Some(_)) ::: acc
fields2: Map[String, Json],
path: Pointer): Eval[Chain[Operation[Json]]] =
fields1 match {
case (fld, value1) :: fields1 =>
fields2.get(fld) match {
case Some(value2) =>
fieldsDiff(fields1, fields2 - fld, path).flatMap(d => diff(value1, value2, path / fld).map(_ ++ d))
case None =>
// field is not in the second object, delete it
fieldsDiff(fields1, fields2, path).map(
_.prepend(Remove(path / fld, if (rememberOld) Some(value1) else None)))
}
case Nil =>
Eval.now(Chain.fromSeq(fields2.toList).map { case (fld, value) => Add(path / fld, value) })
}
@tailrec
def fields(fs: List[(Option[(String, Json)], Option[(String, Json)])],
acc: List[Operation[Json]]): List[Operation[Json]] = fs match {
case (Some(f1), Some(f2)) :: tl if f1 == f2 =>
// all right, nothing changed
fields(tl, acc)
case (Some(f1), Some(f2)) :: tl =>
// same field name, different values
fields(tl, diff(f1._2, f2._2, path / f1._1) ::: acc)
case (Some(f1), None) :: tl =>
// the field was deleted
fields(tl, Remove[Json](path / f1._1, if (rememberOld) Some(f1._2) else None) :: acc)
case (None, Some(f2)) :: tl =>
// the field was added
fields(tl, Add(path / f2._1, f2._2) :: acc)
case _ =>
acc
}
fields(associate(sorted1, sorted2, Nil), Nil)
}

private def arraysDiff(arr1: List[Json], arr2: List[Json], path: Pointer): List[Operation[Json]] = {
private def arraysDiff(arr1: List[Json], arr2: List[Json], path: Pointer): Eval[Chain[Operation[Json]]] = {
// get the longest common subsequence in the array
val lcs = Lcs.lcs(arr1, arr2)

Expand All @@ -104,27 +76,27 @@ class JsonDiff[Json](diffArray: Boolean, rememberOld: Boolean)(implicit J: Jsony

// add a bunch of values to an array starting at the specified index
@tailrec
def add(arr: List[Json], idx: Int, acc: List[Operation[Json]]): List[Operation[Json]] = arr match {
case v :: tl => add(tl, idx + 1, Add(path / idx, v) :: acc)
case Nil => acc.reverse
def add(arr: List[Json], idx: Int, acc: Chain[Operation[Json]]): Chain[Operation[Json]] = arr match {
case v :: tl => add(tl, idx + 1, acc.append(Add(path / idx, v)))
case Nil => acc
}

// remove a bunch of array elements starting by the last one in the range
def remove(from: Int, until: Int, shift: Int, arr: List[Json]): List[Operation[Json]] =
(for (idx <- until to from by -1)
yield Remove[Json](path / idx, if (rememberOld) Some(arr(idx - shift)) else None)).toList
def remove(from: Int, until: Int, shift: Int, arr: List[Json]): Chain[Operation[Json]] =
Chain.fromSeq(
for (idx <- until to from by -1)
yield Remove[Json](path / idx, if (rememberOld) Some(arr(idx - shift)) else None))

// now iterate over the first array to computes what was added, what was removed and what was modified
@tailrec
def loop(
arr1: List[Json], // the first array
arr2: List[Json], // the second array
idx1: Int, // current index in the first array
shift1: Int, // current index shift in the first array (due to elements being add or removed)
idx2: Int, // current index in the second array
lcs: List[(Int, Int)], // the list of remaining matching indices
acc: List[Operation[Json]] // the already accumulated result
): List[Operation[Json]] = (arr1, arr2) match {
acc: Chain[Operation[Json]] // the already accumulated result
): Eval[Chain[Operation[Json]]] = (arr1, arr2) match {
case (_ :: tl1, _) if isCommon1(idx1, lcs) =>
// all values in arr2 were added until the index of common value
val until = lcs.head._2
Expand All @@ -134,7 +106,7 @@ class JsonDiff[Json](diffArray: Boolean, rememberOld: Boolean)(implicit J: Jsony
shift1 + until - idx2,
until + 1,
lcs.tail,
add(arr2.take(until - idx2), idx1 + shift1, Nil) reverse_::: acc)
acc ++ add(arr2.take(until - idx2), idx1 + shift1, Chain.empty))
case (_, _ :: tl2) if isCommon2(idx2, lcs) =>
// all values in arr1 were removed until the index of common value
val until = lcs.head._1
Expand All @@ -144,18 +116,18 @@ class JsonDiff[Json](diffArray: Boolean, rememberOld: Boolean)(implicit J: Jsony
shift1 - (until - idx1),
idx2 + 1,
lcs.tail,
remove(idx1 + shift1, until - 1 + shift1, idx1 + shift1, arr1) reverse_::: acc)
acc ++ remove(idx1 + shift1, until - 1 + shift1, idx1 + shift1, arr1))
case (v1 :: tl1, v2 :: tl2) =>
// values are different, recursively compute the diff of these values
loop(tl1, tl2, idx1 + 1, shift1, idx2 + 1, lcs, diff(v1, v2, path / (idx1 + shift1)) reverse_::: acc)
diff(v1, v2, path / (idx1 + shift1)).flatMap(d => loop(tl1, tl2, idx1 + 1, shift1, idx2 + 1, lcs, acc ++ d))
case (_, Nil) =>
// all subsequent values in arr1 were removed
remove(idx1 + shift1, idx1 + arr1.size - 1 + shift1, idx1 + shift1, arr1) reverse_::: acc
Eval.now(acc ++ remove(idx1 + shift1, idx1 + arr1.size - 1 + shift1, idx1 + shift1, arr1))
case (Nil, _) =>
// all subsequent value in arr2 were added
arr2.map(Add(path / "-", _)) reverse_::: acc
Eval.now(acc ++ Chain.fromSeq(arr2.map(Add(path / "-", _))))
}

loop(arr1, arr2, 0, 0, 0, lcs, Nil).reverse
loop(arr1, arr2, 0, 0, 0, lcs, Chain.empty)
}
}
4 changes: 2 additions & 2 deletions core/src/main/scala/diffson/jsonpatch/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ package object jsonpatch {
object lcsdiff {
object remembering {
implicit def JsonDiffDiff[Json: Jsony: Lcs]: Diff[Json, JsonPatch[Json]] =
new JsonDiff[Json](true, true)
new JsonDiff[Json](true, true)(implicitly, implicitly[Lcs[Json]].savedHashes)
}
implicit def JsonDiffDiff[Json: Jsony: Lcs]: Diff[Json, JsonPatch[Json]] =
new JsonDiff[Json](true, false)
new JsonDiff[Json](true, false)(implicitly, implicitly[Lcs[Json]].savedHashes)
}

object simplediff {
Expand Down
11 changes: 5 additions & 6 deletions core/src/main/scala/diffson/lcs/Patience.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import cats.Eq
import cats.implicits._

import scala.annotation.tailrec
import scala.collection.SortedMap
import scala.collection.immutable.TreeMap
import scala.collection.compat._

Expand All @@ -43,17 +42,17 @@ class Patience[T: Eq](withFallback: Boolean = true) extends Lcs[T] {
/** Returns occurrences that appear only once in the list, associated with their index */
private def uniques(l: List[T]): Map[T, Int] = {
@tailrec
def loop(l: List[Occurrence], acc: Map[T, Int]): Map[T, Int] = l match {
case (value, idx) :: tl =>
def loop(l: List[T], idx: Int, acc: Map[T, Int]): Map[T, Int] = l match {
case value :: tl =>
if (acc.contains(value))
// not unique, remove it from the accumulator and go further
loop(tl, acc - value)
loop(tl, idx + 1, acc - value)
else
loop(tl, acc + (value -> idx))
loop(tl, idx + 1, acc.updated(value, idx))
case Nil =>
acc
}
loop(l.zipWithIndex, Map.empty)
loop(l, 0, Map.empty)
}

/** Takes all occurences from the first sequence and order them as in the second sequence if it is present */
Expand Down
1 change: 1 addition & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ addSbtPlugin("org.typelevel" % "sbt-typelevel" % "0.4.18")
addSbtPlugin("org.scala-js" % "sbt-scalajs" % "1.13.0")
addSbtPlugin("org.scala-native" % "sbt-scala-native" % "0.4.10")
addSbtPlugin("org.portable-scala" % "sbt-scala-native-crossproject" % "1.2.0")
addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.4")
Loading

0 comments on commit 6bdff5b

Please sign in to comment.