From 3d474c89054e6c6094b99e57fe652ad6d45c2976 Mon Sep 17 00:00:00 2001 From: Peter Weinberger Date: Wed, 30 Mar 2022 14:48:29 -0400 Subject: [PATCH] internal/lsp/diff: new diff implementation to replace go-diff The new implementation is based on Myers' paper, and is in the package diff/lcs. There is a new option newDiff, that can be set to 'old', 'new', or 'both'. The default is 'both', although that may not be the right choice for a release. See gopls/hooks/diff.go. 'both' runs both the old and new diff algorithm and saves some statistics in a file in os.Tempdir(), When (or if) the new code becomes the default, this logging (and some internal checking) will be removed. The new implementation has internal checking, which currently panics. The code in gopls/hooks/diff.go tries to save an encrypted (for privacy) version of the failing input. The package diff/myers has not been replaced, but it could be. Fixes golang/go#52966 Change-Id: Id38d76ed383c4330d9373580561765b5a2412587 Reviewed-on: https://go-review.googlesource.com/c/tools/+/396855 Reviewed-by: Robert Findley TryBot-Result: Gopher Robot Reviewed-by: Alan Donovan Run-TryBot: Peter Weinberger --- go.mod | 4 +- go.sum | 22 +- gopls/doc/settings.md | 10 + gopls/go.mod | 4 +- gopls/go.sum | 20 +- gopls/internal/hooks/diff.go | 164 +++++++++ gopls/internal/hooks/diff_test.go | 47 ++- gopls/internal/hooks/hooks.go | 10 +- internal/lsp/diff/diff_test.go | 125 +++++++ internal/lsp/diff/lcs/common.go | 184 ++++++++++ internal/lsp/diff/lcs/common_test.go | 140 +++++++ internal/lsp/diff/lcs/doc.go | 156 ++++++++ internal/lsp/diff/lcs/git.sh | 34 ++ internal/lsp/diff/lcs/labels.go | 55 +++ internal/lsp/diff/lcs/old.go | 530 +++++++++++++++++++++++++++ internal/lsp/diff/lcs/old_test.go | 203 ++++++++++ internal/lsp/diff/ndiff.go | 130 +++++++ internal/lsp/source/options.go | 12 +- internal/lsp/tests/tests.go | 1 + 19 files changed, 1804 insertions(+), 47 deletions(-) create mode 100644 internal/lsp/diff/lcs/common.go create mode 100644 internal/lsp/diff/lcs/common_test.go create mode 100644 internal/lsp/diff/lcs/doc.go create mode 100644 internal/lsp/diff/lcs/git.sh create mode 100644 internal/lsp/diff/lcs/labels.go create mode 100644 internal/lsp/diff/lcs/old.go create mode 100644 internal/lsp/diff/lcs/old_test.go create mode 100644 internal/lsp/diff/ndiff.go diff --git a/go.mod b/go.mod index 985b9cc120c..f05aba2b64a 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,12 @@ module golang.org/x/tools -go 1.17 +go 1.18 require ( github.com/yuin/goldmark v1.4.1 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f golang.org/x/sync v0.0.0-20210220032951-036812b2e83c - golang.org/x/sys v0.0.0-20211019181941-9d821ace8654 + golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a golang.org/x/text v0.3.7 ) diff --git a/go.sum b/go.sum index 85cf00cab79..efeb68a0ec2 100644 --- a/go.sum +++ b/go.sum @@ -1,30 +1,12 @@ github.com/yuin/goldmark v1.4.1 h1:/vn0k+RBvwlxEmP5E7SZMqNxPhfMVFEJiykr15/0XKM= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f h1:OfiFi4JbukWwe3lzw+xunroH1mnC1e2Gy5cxNJApiSY= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211019181941-9d821ace8654 h1:id054HUawV2/6IGm2IV8KZQjqtwAOo2CYlOToYqa0d0= -golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a h1:dGzPydgVsqGcTRVwiLJ1jVbufYwmzD3LfVPLKsKg+0k= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/gopls/doc/settings.md b/gopls/doc/settings.md index 0ed0e19bb02..6d923b71e36 100644 --- a/gopls/doc/settings.md +++ b/gopls/doc/settings.md @@ -452,6 +452,16 @@ Default: `false`. +#### **newDiff** *string* + +newDiff enables the new diff implementation. If this is "both", +for now both diffs will be run and statistics will be generateted in +a file in $TMPDIR. This is a risky setting; help in trying it +is appreciated. If it is "old" the old implementation is used, +and if it is "new", just the new implementation is used. + +Default: 'old'. + ## Code Lenses These are the code lenses that `gopls` currently supports. They can be enabled diff --git a/gopls/go.mod b/gopls/go.mod index 020386fca28..3e89620b7f6 100644 --- a/gopls/go.mod +++ b/gopls/go.mod @@ -8,7 +8,7 @@ require ( github.com/jba/templatecheck v0.6.0 github.com/sergi/go-diff v1.1.0 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 - golang.org/x/sys v0.0.0-20220209214540-3681064d5158 + golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a golang.org/x/tools v0.1.11-0.20220523181440-ccb10502d1a5 golang.org/x/vuln v0.0.0-20220718121659-b9a3ad919698 honnef.co/go/tools v0.3.2 @@ -20,7 +20,7 @@ require ( github.com/BurntSushi/toml v1.0.0 // indirect github.com/google/safehtml v0.0.2 // indirect golang.org/x/exp/typeparams v0.0.0-20220218215828-6cf2b201936e // indirect - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect + golang.org/x/sync v0.0.0-20210220032951-036812b2e83c golang.org/x/text v0.3.7 // indirect golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect ) diff --git a/gopls/go.sum b/gopls/go.sum index f263f9e0d00..4ee977a8c89 100644 --- a/gopls/go.sum +++ b/gopls/go.sum @@ -1,21 +1,14 @@ -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/BurntSushi/toml v1.0.0 h1:dtDWrepsVPfW9H/4y7dDgFc2MBUSeJhlaDtK13CxFlU= github.com/BurntSushi/toml v1.0.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/client9/misspell v0.3.4 h1:ta993UF76GwbvJcIo3Y68y/M3WxlpEHPWIGDkJYwzJI= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/frankban/quicktest v1.14.2 h1:SPb1KFFmM+ybpEjPUhCCkZOM5xlovT5UbrMvWnXyBns= github.com/frankban/quicktest v1.14.2/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps= -github.com/google/go-cmdtest v0.4.0/go.mod h1:apVn/GCasLZUVpAJ6oWAuyP7Ne7CEsQbTnc0plM3m+o= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/safehtml v0.0.2 h1:ZOt2VXg4x24bW0m2jtzAOkhoXV0iM8vNKc0paByCZqM= github.com/google/safehtml v0.0.2/go.mod h1:L4KWwDsUJdECRAEpZoBn3O64bQaywRscowZjJAzjHnU= github.com/jba/printsrc v0.2.2 h1:9OHK51UT+/iMAEBlQIIXW04qvKyF3/vvLuwW/hL8tDU= @@ -47,8 +40,6 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/exp/typeparams v0.0.0-20220218215828-6cf2b201936e h1:qyrTQ++p1afMkO4DPEeLGq/3oTsdlvdH4vqZUBWzUKM= golang.org/x/exp/typeparams v0.0.0-20220218215828-6cf2b201936e/go.mod h1:AbB0pIl9nAr9wVwH+Z2ZpaocVmF5I4GyWCDIsVjR0bk= golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= -golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= -golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -61,18 +52,15 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211213223007-03aa0b5f6827/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220209214540-3681064d5158 h1:rm+CHSpPEEW2IsXUib1ThaHIjuBVZjxNgSKmBLFfD4c= golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a h1:dGzPydgVsqGcTRVwiLJ1jVbufYwmzD3LfVPLKsKg+0k= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/vuln v0.0.0-20220503210553-a5481fb0c8be h1:jokAF1mfylAi1iTQx7C44B7vyXUcSEMw8eDv0PzNu8s= -golang.org/x/vuln v0.0.0-20220503210553-a5481fb0c8be/go.mod h1:twca1SxmF6/i2wHY/mj1vLIkkHdp+nil/yA32ZOP4kg= golang.org/x/vuln v0.0.0-20220613164644-4eb5ba49563c h1:r5bbIROBQtRRgoutV8Q3sFY58VzHW6jMBYl48ANSyS4= golang.org/x/vuln v0.0.0-20220613164644-4eb5ba49563c/go.mod h1:UZshlUPxXeGUM9I14UOawXQg6yosDE9cr1vKY/DzgWo= golang.org/x/vuln v0.0.0-20220718121659-b9a3ad919698 h1:9lgpkUgjzoIcZYp7/UPFO/0jIlYcokcEjqWm0hj9pzE= @@ -88,14 +76,10 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -honnef.co/go/tools v0.2.2/go.mod h1:lPVVZ2BS5TfnjLyizF7o7hv7j9/L+8cZY2hLyjP9cGY= -honnef.co/go/tools v0.3.0 h1:2LdYUZ7CIxnYgskbUZfY7FPggmqnh6shBqfWa8Tn3XU= -honnef.co/go/tools v0.3.0/go.mod h1:vlRD9XErLMGT+mDuofSr0mMMquscM/1nQqtRSsh6m70= honnef.co/go/tools v0.3.2 h1:ytYb4rOqyp1TSa2EPvNVwtPQJctSELKaMyLfqNP4+34= honnef.co/go/tools v0.3.2/go.mod h1:jzwdWgg7Jdq75wlfblQxO4neNaFFSvgc1tD5Wv8U0Yw= mvdan.cc/gofumpt v0.3.0 h1:kTojdZo9AcEYbQYhGuLf/zszYthRdhDNDUi2JKTxas4= mvdan.cc/gofumpt v0.3.0/go.mod h1:0+VyGZWleeIj5oostkOex+nDBA0eyavuDnDusAJ8ylo= mvdan.cc/unparam v0.0.0-20211214103731-d0ef000c54e5 h1:Jh3LAeMt1eGpxomyu3jVkmVZWW2MxZ1qIIV2TZ/nRio= -mvdan.cc/unparam v0.0.0-20211214103731-d0ef000c54e5/go.mod h1:b8RRCBm0eeiWR8cfN88xeq2G5SG3VKGO+5UPWi5FSOY= mvdan.cc/xurls/v2 v2.4.0 h1:tzxjVAj+wSBmDcF6zBB7/myTy3gX9xvi8Tyr28AuQgc= mvdan.cc/xurls/v2 v2.4.0/go.mod h1:+GEjq9uNjqs8LQfM9nVnM8rff0OQ5Iash5rzX+N1CSg= diff --git a/gopls/internal/hooks/diff.go b/gopls/internal/hooks/diff.go index a307ba77fd6..e0461a152bb 100644 --- a/gopls/internal/hooks/diff.go +++ b/gopls/internal/hooks/diff.go @@ -5,13 +5,177 @@ package hooks import ( + "crypto/rand" + "encoding/json" "fmt" + "io" + "log" + "math/big" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "time" + "unicode" "github.com/sergi/go-diff/diffmatchpatch" "golang.org/x/tools/internal/lsp/diff" "golang.org/x/tools/internal/span" ) +// structure for saving information about diffs +// while the new code is being rolled out +type diffstat struct { + Before, After int + Oldedits, Newedits int + Oldtime, Newtime time.Duration + Stack string + Msg string `json:",omitempty"` // for errors + Ignored int `json:",omitempty"` // numbr of skipped records with 0 edits +} + +var ( + mu sync.Mutex // serializes writes and protects ignored + difffd io.Writer + ignored int // lots of the diff calls have 0 diffs +) + +var fileonce sync.Once + +func (s *diffstat) save() { + // save log records in a file in os.TempDir(). + // diff is frequently called with identical strings, so + // these are somewhat compressed out + fileonce.Do(func() { + fname := filepath.Join(os.TempDir(), fmt.Sprintf("gopls-diff-%x", os.Getpid())) + fd, err := os.Create(fname) + if err != nil { + // now what? + } + difffd = fd + }) + + mu.Lock() + defer mu.Unlock() + if s.Oldedits == 0 && s.Newedits == 0 { + if ignored < 15 { + // keep track of repeated instances of no diffs + // but only print every 15th + ignored++ + return + } + s.Ignored = ignored + 1 + } else { + s.Ignored = ignored + } + ignored = 0 + // it would be really nice to see why diff was called + _, f, l, ok := runtime.Caller(2) + if ok { + var fname string + fname = filepath.Base(f) // diff is only called from a few places + s.Stack = fmt.Sprintf("%s:%d", fname, l) + } + x, err := json.Marshal(s) + if err != nil { + log.Print(err) // failure to print statistics should not stop gopls + } + fmt.Fprintf(difffd, "%s\n", x) +} + +// save encrypted versions of the broken input and return the file name +// (the saved strings will have the same diff behavior as the user's strings) +func disaster(before, after string) string { + // encrypt before and after for privacy. (randomized monoalphabetic cipher) + // got will contain the substitution cipher + // for the runes in before and after + got := map[rune]rune{} + for _, r := range before { + got[r] = ' ' // value doesn't matter + } + for _, r := range after { + got[r] = ' ' + } + repl := initrepl(len(got)) + i := 0 + for k := range got { // randomized + got[k] = repl[i] + i++ + } + // use got to encrypt before and after + subst := func(r rune) rune { return got[r] } + first := strings.Map(subst, before) + second := strings.Map(subst, after) + + // one failure per session is enough, and more private. + // this saves the last one. + fname := fmt.Sprintf("%s/gopls-failed-%x", os.TempDir(), os.Getpid()) + fd, err := os.Create(fname) + defer fd.Close() + _, err = fd.Write([]byte(fmt.Sprintf("%s\n%s\n", string(first), string(second)))) + if err != nil { + // what do we tell the user? + return "" + } + // ask the user to send us the file, somehow + return fname +} + +func initrepl(n int) []rune { + repl := make([]rune, 0, n) + for r := rune(0); len(repl) < n; r++ { + if unicode.IsLetter(r) || unicode.IsNumber(r) { + repl = append(repl, r) + } + } + // randomize repl + rdr := rand.Reader + lim := big.NewInt(int64(len(repl))) + for i := 1; i < n; i++ { + v, _ := rand.Int(rdr, lim) + k := v.Int64() + repl[i], repl[k] = repl[k], repl[i] + } + return repl +} + +// BothDiffs edits calls both the new and old diffs, checks that the new diffs +// change before into after, and attempts to preserve some statistics. +func BothDiffs(uri span.URI, before, after string) (edits []diff.TextEdit, err error) { + // The new diff code contains a lot of internal checks that panic when they + // fail. This code catches the panics, or other failures, tries to save + // the failing example (and ut wiykd ask the user to send it back to us, and + // changes options.newDiff to 'old', if only we could figure out how.) + stat := diffstat{Before: len(before), After: len(after)} + now := time.Now() + Oldedits, oerr := ComputeEdits(uri, before, after) + if oerr != nil { + stat.Msg += fmt.Sprintf("old:%v", oerr) + } + stat.Oldedits = len(Oldedits) + stat.Oldtime = time.Since(now) + defer func() { + if r := recover(); r != nil { + disaster(before, after) + edits, err = Oldedits, oerr + } + }() + now = time.Now() + Newedits, rerr := diff.NComputeEdits(uri, before, after) + stat.Newedits = len(Newedits) + stat.Newtime = time.Now().Sub(now) + got := diff.ApplyEdits(before, Newedits) + if got != after { + stat.Msg += "FAIL" + disaster(before, after) + stat.save() + return Oldedits, oerr + } + stat.save() + return Newedits, rerr +} + func ComputeEdits(uri span.URI, before, after string) (edits []diff.TextEdit, err error) { // The go-diff library has an unresolved panic (see golang/go#278774). // TODO(rstambler): Remove the recover once the issue has been fixed diff --git a/gopls/internal/hooks/diff_test.go b/gopls/internal/hooks/diff_test.go index d979be78dbe..a9e536728b0 100644 --- a/gopls/internal/hooks/diff_test.go +++ b/gopls/internal/hooks/diff_test.go @@ -2,15 +2,56 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -package hooks_test +package hooks import ( + "fmt" + "io/ioutil" + "os" "testing" + "unicode/utf8" - "golang.org/x/tools/gopls/internal/hooks" "golang.org/x/tools/internal/lsp/diff/difftest" ) func TestDiff(t *testing.T) { - difftest.DiffTest(t, hooks.ComputeEdits) + difftest.DiffTest(t, ComputeEdits) +} + +func TestRepl(t *testing.T) { + t.Skip("just for checking repl by looking at it") + repl := initrepl(800) + t.Errorf("%q", string(repl)) + t.Errorf("%d", len(repl)) +} + +func TestDisaster(t *testing.T) { + a := "This is a string,(\u0995) just for basic functionality" + b := "Ths is another string, (\u0996) to see if disaster will store stuff correctly" + fname := disaster(a, b) + buf, err := ioutil.ReadFile(fname) + if err != nil { + t.Errorf("error %v reading %s", err, fname) + } + var x, y string + n, err := fmt.Sscanf(string(buf), "%s\n%s\n", &x, &y) + if n != 2 { + t.Errorf("got %d, expected 2", n) + t.Logf("read %q", string(buf)) + } + if a == x || b == y { + t.Error("failed to encrypt") + } + err = os.Remove(fname) + if err != nil { + t.Errorf("%v removing %s", err, fname) + } + alen, blen := utf8.RuneCount([]byte(a)), utf8.RuneCount([]byte(b)) + xlen, ylen := utf8.RuneCount([]byte(x)), utf8.RuneCount([]byte(y)) + if alen != xlen { + t.Errorf("a; got %d, expected %d", xlen, alen) + } + if blen != ylen { + t.Errorf("b: got %d expected %d", ylen, blen) + } } diff --git a/gopls/internal/hooks/hooks.go b/gopls/internal/hooks/hooks.go index 023aefeab98..b55917e0737 100644 --- a/gopls/internal/hooks/hooks.go +++ b/gopls/internal/hooks/hooks.go @@ -11,6 +11,7 @@ import ( "context" "golang.org/x/tools/gopls/internal/vulncheck" + "golang.org/x/tools/internal/lsp/diff" "golang.org/x/tools/internal/lsp/source" "mvdan.cc/gofumpt/format" "mvdan.cc/xurls/v2" @@ -19,7 +20,14 @@ import ( func Options(options *source.Options) { options.LicensesText = licensesText if options.GoDiff { - options.ComputeEdits = ComputeEdits + switch options.NewDiff { + case "old": + options.ComputeEdits = ComputeEdits + case "new": + options.ComputeEdits = diff.NComputeEdits + default: + options.ComputeEdits = BothDiffs + } } options.URLRegexp = xurls.Relaxed() options.GofumptFormat = func(ctx context.Context, langVersion, modulePath string, src []byte) ([]byte, error) { diff --git a/internal/lsp/diff/diff_test.go b/internal/lsp/diff/diff_test.go index dd9414e5d7a..a3699498914 100644 --- a/internal/lsp/diff/diff_test.go +++ b/internal/lsp/diff/diff_test.go @@ -6,6 +6,8 @@ package diff_test import ( "fmt" + "math/rand" + "strings" "testing" "golang.org/x/tools/internal/lsp/diff" @@ -29,6 +31,71 @@ func TestApplyEdits(t *testing.T) { } } +func TestNEdits(t *testing.T) { + for i, tc := range difftest.TestCases { + sp := fmt.Sprintf("file://%s.%d", tc.Name, i) + edits, err := diff.NComputeEdits(span.URI(sp), tc.In, tc.Out) + if err != nil { + t.Fatal(err) + } + got := diff.ApplyEdits(tc.In, edits) + if got != tc.Out { + t.Fatalf("%s: got %q wanted %q", tc.Name, got, tc.Out) + } + if len(edits) < len(tc.Edits) { // should find subline edits + t.Errorf("got %v, expected %v for %#v", edits, tc.Edits, tc) + } + } +} + +func TestNRandom(t *testing.T) { + rand.Seed(1) + for i := 0; i < 1000; i++ { + fname := fmt.Sprintf("file://%x", i) + a := randstr("abω", 16) + b := randstr("abωc", 16) + edits, err := diff.NComputeEdits(span.URI(fname), a, b) + if err != nil { + t.Fatalf("%q,%q %v", a, b, err) + } + got := diff.ApplyEdits(a, edits) + if got != b { + t.Fatalf("%d: got %q, wanted %q, starting with %q", i, got, b, a) + } + } +} + +func TestNLinesRandom(t *testing.T) { + rand.Seed(2) + for i := 0; i < 1000; i++ { + fname := fmt.Sprintf("file://%x", i) + x := randlines("abω", 4) // avg line length is 6, want a change every 3rd line or so + v := []rune(x) + for i := 0; i < len(v); i++ { + if rand.Float64() < .05 { + v[i] = 'N' + } + } + y := string(v) + // occasionally remove the trailing \n + if rand.Float64() < .1 { + x = x[:len(x)-1] + } + if rand.Float64() < .1 { + y = y[:len(y)-1] + } + a, b := strings.SplitAfter(x, "\n"), strings.SplitAfter(y, "\n") + edits, err := diff.NComputeLineEdits(span.URI(fname), a, b) + if err != nil { + t.Fatalf("%q,%q %v", a, b, err) + } + got := diff.ApplyEdits(x, edits) + if got != y { + t.Fatalf("%d: got\n%q, wanted\n%q, starting with %q", i, got, y, a) + } + } +} + func TestLineEdits(t *testing.T) { for _, tc := range difftest.TestCases { t.Run(tc.Name, func(t *testing.T) { @@ -63,6 +130,41 @@ func TestUnified(t *testing.T) { } } +func TestRegressionOld001(t *testing.T) { + a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/lsp/diff/difftest\"\n\t\"golang.org/x/tools/internal/span\"\n)\n" + + b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/lsp/diff/difftest\"\n\t\"golang.org/x/tools/internal/span\"\n)\n" + diffs, err := diff.NComputeEdits(span.URI("file://one"), a, b) + if err != nil { + t.Error(err) + } + got := diff.ApplyEdits(a, diffs) + if got != b { + i := 0 + for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ { + } + t.Errorf("oops %vd\n%q\n%q", diffs, got, b) + t.Errorf("\n%q\n%q", got[i:], b[i:]) + } +} + +func TestRegressionOld002(t *testing.T) { + a := "n\"\n)\n" + b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n" + diffs, err := diff.NComputeEdits(span.URI("file://two"), a, b) + if err != nil { + t.Error(err) + } + got := diff.ApplyEdits(a, diffs) + if got != b { + i := 0 + for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ { + } + t.Errorf("oops %vd\n%q\n%q", diffs, got, b) + t.Errorf("\n%q\n%q", got[i:], b[i:]) + } +} + func diffEdits(got, want []diff.TextEdit) bool { if len(got) != len(want) { return true @@ -78,3 +180,26 @@ func diffEdits(got, want []diff.TextEdit) bool { } return false } + +// return a random string of length n made of characters from s +func randstr(s string, n int) string { + src := []rune(s) + x := make([]rune, n) + for i := 0; i < n; i++ { + x[i] = src[rand.Intn(len(src))] + } + return string(x) +} + +// return some random lines, all ending with \n +func randlines(s string, n int) string { + src := []rune(s) + var b strings.Builder + for i := 0; i < n; i++ { + for j := 0; j < 4+rand.Intn(4); j++ { + b.WriteRune(src[rand.Intn(len(src))]) + } + b.WriteByte('\n') + } + return b.String() +} diff --git a/internal/lsp/diff/lcs/common.go b/internal/lsp/diff/lcs/common.go new file mode 100644 index 00000000000..e5d08014760 --- /dev/null +++ b/internal/lsp/diff/lcs/common.go @@ -0,0 +1,184 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lcs + +import ( + "log" + "sort" +) + +// lcs is a longest common sequence +type lcs []diag + +// A diag is a piece of the edit graph where A[X+i] == B[Y+i], for 0<=i l[j].Len + }) + return l +} + +// validate that the elements of the lcs do not overlap +// (can only happen when the two-sided algorithm ends early) +// expects the lcs to be sorted +func (l lcs) valid() bool { + for i := 1; i < len(l); i++ { + if l[i-1].X+l[i-1].Len > l[i].X { + return false + } + if l[i-1].Y+l[i-1].Len > l[i].Y { + return false + } + } + return true +} + +// repair overlapping lcs +// only called if two-sided stops early +func (l lcs) fix() lcs { + // from the set of diagonals in l, find a maximal non-conflicting set + // this problem may be NP-complete, but we use a greedy heuristic, + // which is quadratic, but with a better data structure, could be D log D. + // indepedent is not enough: {0,3,1} and {3,0,2} can't both occur in an lcs + // which has to have monotone x and y + if len(l) == 0 { + return nil + } + sort.Slice(l, func(i, j int) bool { return l[i].Len > l[j].Len }) + tmp := make(lcs, 0, len(l)) + tmp = append(tmp, l[0]) + for i := 1; i < len(l); i++ { + var dir direction + nxt := l[i] + for _, in := range tmp { + if dir, nxt = overlap(in, nxt); dir == empty || dir == bad { + break + } + } + if nxt.Len > 0 && dir != bad { + tmp = append(tmp, nxt) + } + } + tmp.sort() + if false && !tmp.valid() { // debug checking + log.Fatalf("here %d", len(tmp)) + } + return tmp +} + +type direction int + +const ( + empty direction = iota // diag is empty (so not in lcs) + leftdown // proposed acceptably to the left and below + rightup // proposed diag is acceptably to the right and above + bad // proposed diag is inconsistent with the lcs so far +) + +// overlap trims the proposed diag prop so it doesn't overlap with +// the existing diag that has already been added to the lcs. +func overlap(exist, prop diag) (direction, diag) { + if prop.X <= exist.X && exist.X < prop.X+prop.Len { + // remove the end of prop where it overlaps with the X end of exist + delta := prop.X + prop.Len - exist.X + prop.Len -= delta + if prop.Len <= 0 { + return empty, prop + } + } + if exist.X <= prop.X && prop.X < exist.X+exist.Len { + // remove the beginning of prop where overlaps with exist + delta := exist.X + exist.Len - prop.X + prop.Len -= delta + if prop.Len <= 0 { + return empty, prop + } + prop.X += delta + prop.Y += delta + } + if prop.Y <= exist.Y && exist.Y < prop.Y+prop.Len { + // remove the end of prop that overlaps (in Y) with exist + delta := prop.Y + prop.Len - exist.Y + prop.Len -= delta + if prop.Len <= 0 { + return empty, prop + } + } + if exist.Y <= prop.Y && prop.Y < exist.Y+exist.Len { + // remove the beginning of peop that overlaps with exist + delta := exist.Y + exist.Len - prop.Y + prop.Len -= delta + if prop.Len <= 0 { + return empty, prop + } + prop.X += delta // no test reaches this code + prop.Y += delta + } + if prop.X+prop.Len <= exist.X && prop.Y+prop.Len <= exist.Y { + return leftdown, prop + } + if exist.X+exist.Len <= prop.X && exist.Y+exist.Len <= prop.Y { + return rightup, prop + } + // prop can't be in an lcs that contains exist + return bad, prop +} + +// manipulating Diag and lcs + +// prependlcs a diagonal (x,y)-(x+1,y+1) segment either to an empty lcs +// or to its first Diag. prependlcs is only called extending diagonals +// the backward direction. +func prependlcs(lcs lcs, x, y int) lcs { + if len(lcs) > 0 { + d := &lcs[0] + if int(d.X) == x+1 && int(d.Y) == y+1 { + // extend the diagonal down and to the left + d.X, d.Y = int(x), int(y) + d.Len++ + return lcs + } + } + + r := diag{X: int(x), Y: int(y), Len: 1} + lcs = append([]diag{r}, lcs...) + return lcs +} + +// appendlcs appends a diagonal, or extends the existing one. +// by adding the edge (x,y)-(x+1.y+1). appendlcs is only called +// while extending diagonals in the forward direction. +func appendlcs(lcs lcs, x, y int) lcs { + if len(lcs) > 0 { + last := &lcs[len(lcs)-1] + // Expand last element if adjoining. + if last.X+last.Len == x && last.Y+last.Len == y { + last.Len++ + return lcs + } + } + + return append(lcs, diag{X: x, Y: y, Len: 1}) +} + +// enforce constraint on d, k +func ok(d, k int) bool { + return d >= 0 && -d <= k && k <= d +} + +type Diff struct { + Start, End int // offsets in A + Text string // replacement text +} diff --git a/internal/lsp/diff/lcs/common_test.go b/internal/lsp/diff/lcs/common_test.go new file mode 100644 index 00000000000..4aa36abc2e8 --- /dev/null +++ b/internal/lsp/diff/lcs/common_test.go @@ -0,0 +1,140 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lcs + +import ( + "log" + "math/rand" + "strings" + "testing" +) + +type Btest struct { + a, b string + lcs []string +} + +var Btests = []Btest{ + {"aaabab", "abaab", []string{"abab", "aaab"}}, + {"aabbba", "baaba", []string{"aaba"}}, + {"cabbx", "cbabx", []string{"cabx", "cbbx"}}, + {"c", "cb", []string{"c"}}, + {"aaba", "bbb", []string{"b"}}, + {"bbaabb", "b", []string{"b"}}, + {"baaabb", "bbaba", []string{"bbb", "baa", "bab"}}, + {"baaabb", "abbab", []string{"abb", "bab", "aab"}}, + {"baaba", "aaabba", []string{"aaba"}}, + {"ca", "cba", []string{"ca"}}, + {"ccbcbc", "abba", []string{"bb"}}, + {"ccbcbc", "aabba", []string{"bb"}}, + {"ccb", "cba", []string{"cb"}}, + {"caef", "axe", []string{"ae"}}, + {"bbaabb", "baabb", []string{"baabb"}}, + // Example from Myers: + {"abcabba", "cbabac", []string{"caba", "baba", "cbba"}}, + {"3456aaa", "aaa", []string{"aaa"}}, + {"aaa", "aaa123", []string{"aaa"}}, + {"aabaa", "aacaa", []string{"aaaa"}}, + {"1a", "a", []string{"a"}}, + {"abab", "bb", []string{"bb"}}, + {"123", "ab", []string{""}}, + {"a", "b", []string{""}}, + {"abc", "123", []string{""}}, + {"aa", "aa", []string{"aa"}}, + {"abcde", "12345", []string{""}}, + {"aaa3456", "aaa", []string{"aaa"}}, + {"abcde", "12345a", []string{"a"}}, + {"ab", "123", []string{""}}, + {"1a2", "a", []string{"a"}}, + // for two-sided + {"babaab", "cccaba", []string{"aba"}}, + {"aabbab", "cbcabc", []string{"bab"}}, + {"abaabb", "bcacab", []string{"baab"}}, + {"abaabb", "abaaaa", []string{"abaa"}}, + {"bababb", "baaabb", []string{"baabb"}}, + {"abbbaa", "cabacc", []string{"aba"}}, + {"aabbaa", "aacaba", []string{"aaaa", "aaba"}}, +} + +func init() { + log.SetFlags(log.Lshortfile) +} + +func check(t *testing.T, str string, lcs lcs, want []string) { + t.Helper() + if !lcs.valid() { + t.Errorf("bad lcs %v", lcs) + } + var got strings.Builder + for _, dd := range lcs { + got.WriteString(str[dd.X : dd.X+dd.Len]) + } + ans := got.String() + for _, w := range want { + if ans == w { + return + } + } + t.Fatalf("str=%q lcs=%v want=%q got=%q", str, lcs, want, ans) +} + +func checkDiffs(t *testing.T, before string, diffs []Diff, after string) { + t.Helper() + var ans strings.Builder + sofar := 0 // index of position in before + for _, d := range diffs { + if sofar < d.Start { + ans.WriteString(before[sofar:d.Start]) + } + ans.WriteString(d.Text) + sofar = d.End + } + ans.WriteString(before[sofar:]) + if ans.String() != after { + t.Fatalf("diff %v took %q to %q, not to %q", diffs, before, ans.String(), after) + } +} + +func lcslen(l lcs) int { + ans := 0 + for _, d := range l { + ans += int(d.Len) + } + return ans +} + +// return a random string of length n made of characters from s +func randstr(s string, n int) string { + src := []rune(s) + x := make([]rune, n) + for i := 0; i < n; i++ { + x[i] = src[rand.Intn(len(src))] + } + return string(x) +} + +func TestLcsFix(t *testing.T) { + tests := []struct{ before, after lcs }{ + {lcs{diag{0, 0, 3}, diag{2, 2, 5}, diag{3, 4, 5}, diag{8, 9, 4}}, lcs{diag{0, 0, 2}, diag{2, 2, 1}, diag{3, 4, 5}, diag{8, 9, 4}}}, + {lcs{diag{1, 1, 6}, diag{6, 12, 3}}, lcs{diag{1, 1, 5}, diag{6, 12, 3}}}, + {lcs{diag{0, 0, 4}, diag{3, 5, 4}}, lcs{diag{0, 0, 3}, diag{3, 5, 4}}}, + {lcs{diag{0, 20, 1}, diag{0, 0, 3}, diag{1, 20, 4}}, lcs{diag{0, 0, 3}, diag{3, 22, 2}}}, + {lcs{diag{0, 0, 4}, diag{1, 1, 2}}, lcs{diag{0, 0, 4}}}, + {lcs{diag{0, 0, 4}}, lcs{diag{0, 0, 4}}}, + {lcs{}, lcs{}}, + {lcs{diag{0, 0, 4}, diag{1, 1, 6}, diag{3, 3, 2}}, lcs{diag{0, 0, 1}, diag{1, 1, 6}}}, + } + for n, x := range tests { + got := x.before.fix() + if len(got) != len(x.after) { + t.Errorf("got %v, expected %v, for %v", got, x.after, x.before) + } + olen := lcslen(x.after) + glen := lcslen(got) + if olen != glen { + t.Errorf("%d: lens(%d,%d) differ, %v, %v, %v", n, glen, olen, got, x.after, x.before) + } + } +} diff --git a/internal/lsp/diff/lcs/doc.go b/internal/lsp/diff/lcs/doc.go new file mode 100644 index 00000000000..dc779f38a01 --- /dev/null +++ b/internal/lsp/diff/lcs/doc.go @@ -0,0 +1,156 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// package lcs contains code to find longest-common-subsequences +// (and diffs) +package lcs + +/* +Compute longest-common-subsequences of two slices A, B using +algorithms from Myers' paper. A longest-common-subsequence +(LCS from now on) of A and B is a maximal set of lexically increasing +pairs of subscripts (x,y) with A[x]==B[y]. There may be many LCS, but +they all have the same length. An LCS determines a sequence of edits +that changes A into B. + +The key concept is the edit graph of A and B. +If A has length N and B has length M, then the edit graph has +vertices v[i][j] for 0 <= i <= N, 0 <= j <= M. There is a +horizontal edge from v[i][j] to v[i+1][j] whenever both are in +the graph, and a vertical edge from v[i][j] to f[i][j+1] similarly. +When A[i] == B[j] there is a diagonal edge from v[i][j] to v[i+1][j+1]. + +A path between in the graph between (0,0) and (N,M) determines a sequence +of edits converting A into B: each horizontal edge corresponds to removing +an element of A, and each vertical edge corresponds to inserting an +element of B. + +A vertex (x,y) is on (forward) diagonal k if x-y=k. A path in the graph +is of length D if it has D non-diagonal edges. The algorithms generate +forward paths (in which at least one of x,y increases at each edge), +or backward paths (in which at least one of x,y decreases at each edge), +or a combination. (Note that the orientation is the traditional mathematical one, +with the origin in the lower-left corner.) + +Here is the edit graph for A:"aabbaa", B:"aacaba". (I know the diagonals look weird.) + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + b | | | ___/‾‾‾ | ___/‾‾‾ | | | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + c | | | | | | | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + a a b b a a + + +The algorithm labels a vertex (x,y) with D,k if it is on diagonal k and at +the end of a maximal path of length D. (Because x-y=k it suffices to remember +only the x coordinate of the vertex.) + +The forward algorithm: Find the longest diagonal starting at (0,0) and +label its end with D=0,k=0. From that vertex take a vertical step and +then follow the longest diagonal (up and to the right), and label that vertex +with D=1,k=-1. From the D=0,k=0 point take a horizontal step and the follow +the longest diagonal (up and to the right) and label that vertex +D=1,k=1. In the same way, having labelled all the D vertices, +from a vertex labelled D,k find two vertices +tentatively labelled D+1,k-1 and D+1,k+1. There may be two on the same +diagonal, in which case take the one with the larger x. + +Eventually the path gets to (N,M), and the diagonals on it are the LCS. + +Here is the edit graph with the ends of D-paths labelled. (So, for instance, +0/2,2 indicates that x=2,y=2 is labelled with 0, as it should be, since the first +step is to go up the longest diagonal from (0,0).) +A:"aabbaa", B:"aacaba" + ⊙ ------- ⊙ ------- ⊙ -------(3/3,6)------- ⊙ -------(3/5,6)-------(4/6,6) + a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | + ⊙ ------- ⊙ ------- ⊙ -------(2/3,5)------- ⊙ ------- ⊙ ------- ⊙ + b | | | ___/‾‾‾ | ___/‾‾‾ | | | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ -------(3/5,4)------- ⊙ + a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | + ⊙ ------- ⊙ -------(1/2,3)-------(2/3,3)------- ⊙ ------- ⊙ ------- ⊙ + c | | | | | | | + ⊙ ------- ⊙ -------(0/2,2)-------(1/3,2)-------(2/4,2)-------(3/5,2)-------(4/6,2) + a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | + ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ + a a b b a a + +The 4-path is reconstructed starting at (4/6,6), horizontal to (3/5,6), diagonal to (3,4), vertical +to (2/3,3), horizontal to (1/2,3), vertical to (0/2,2), and diagonal to (0,0). As expected, +there are 4 non-diagonal steps, and the diagonals form an LCS. + +There is a symmetric backward algorithm, which gives (backwards labels are prefixed with a colon): +A:"aabbaa", B:"aacaba" + ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ + a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | + ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ --------(:0/5,5)-------- ⊙ + b | | | ____/‾‾‾ | ____/‾‾‾ | | | + ⊙ -------- ⊙ -------- ⊙ --------(:1/3,4)-------- ⊙ -------- ⊙ -------- ⊙ + a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | + (:3/0,3)--------(:2/1,3)-------- ⊙ --------(:2/3,3)--------(:1/4,3)-------- ⊙ -------- ⊙ + c | | | | | | | + ⊙ -------- ⊙ -------- ⊙ --------(:3/3,2)--------(:2/4,2)-------- ⊙ -------- ⊙ + a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | + (:3/0,1)-------- ⊙ -------- ⊙ -------- ⊙ --------(:3/4,1)-------- ⊙ -------- ⊙ + a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | + (:4/0,0)-------- ⊙ -------- ⊙ -------- ⊙ --------(:4/4,0)-------- ⊙ -------- ⊙ + a a b b a a + +Neither of these is ideal for use in an editor, where it is undesirable to send very long diffs to the +front end. It's tricky to decide exactly what 'very long diffs' means, as "replace A by B" is very short. +We want to control how big D can be, by stopping when it gets too large. The forward algorithm then +privileges common prefixes, and the backward algorithm privileges common suffixes. Either is an undesirable +asymmetry. + +Fortunately there is a two-sided algorithm, implied by results in Myers' paper. Here's what the labels in +the edit graph look like. +A:"aabbaa", B:"aacaba" + ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ + a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | + ⊙ --------- ⊙ --------- ⊙ --------- (2/3,5) --------- ⊙ --------- (:0/5,5)--------- ⊙ + b | | | ____/‾‾‾‾ | ____/‾‾‾‾ | | | + ⊙ --------- ⊙ --------- ⊙ --------- (:1/3,4)--------- ⊙ --------- ⊙ --------- ⊙ + a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | + ⊙ --------- (:2/1,3)--------- (1/2,3) ---------(2:2/3,3)--------- (:1/4,3)--------- ⊙ --------- ⊙ + c | | | | | | | + ⊙ --------- ⊙ --------- (0/2,2) --------- (1/3,2) ---------(2:2/4,2)--------- ⊙ --------- ⊙ + a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | + ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ + a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | + ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ + a a b b a a + +The algorithm stopped when it saw the backwards 2-path ending at (1,3) and the forwards 2-path ending at (3,5). The criterion +is a backwards path ending at (u,v) and a forward path ending at (x,y), where u <= x and the two points are on the same +diagonal. (Here the edgegraph has a diagonal, but the criterion is x-y=u-v.) Myers proves there is a forward +2-path from (0,0) to (1,3), and that together with the backwards 2-path ending at (1,3) gives the expected 4-path. +Unfortunately the forward path has to be constructed by another run of the forward algorithm; it can't be found from the +computed labels. That is the worst case. Had the code noticed (x,y)=(u,v)=(3,3) the whole path could be reconstructed +from the edgegraph. The implementation looks for a number of special cases to try to avoid computing an extra forward path. + +If the two-sided algorithm has stop early (because D has become too large) it will have found a forward LCS and a +backwards LCS. Ideally these go with disjoint prefixes and suffixes of A and B, but disjointness may fail and the two +computed LCS may conflict. (An easy example is where A is a suffix of B, and shares a short prefix. The backwards LCS +is all of A, and the forward LCS is a prefix of A.) The algorithm combines the two +to form a best-effort LCS. In the worst case the forward partial LCS may have to +be recomputed. +*/ + +/* Eugene Myers paper is titled +"An O(ND) Difference Algorithm and Its Variations" +and can be found at +http://www.xmailserver.org/diff2.pdf + +(There is a generic implementation of the algorithm the the repository with git hash +b9ad7e4ade3a686d608e44475390ad428e60e7fc) +*/ diff --git a/internal/lsp/diff/lcs/git.sh b/internal/lsp/diff/lcs/git.sh new file mode 100644 index 00000000000..caa4c424198 --- /dev/null +++ b/internal/lsp/diff/lcs/git.sh @@ -0,0 +1,34 @@ + +#!/bin/bash +# +# Copyright 2022 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. +# +# Creates a zip file containing all numbered versions +# of the commit history of a large source file, for use +# as input data for the tests of the diff algorithm. +# +# Run script from root of the x/tools repo. + +set -eu + +# WARNING: This script will install the latest version of $file +# The largest real source file in the x/tools repo. +# file=internal/lsp/source/completion/completion.go +# file=internal/lsp/source/diagnostics.go +file=internal/lsp/protocol/tsprotocol.go + +tmp=$(mktemp -d) +git log $file | + awk '/^commit / {print $2}' | + nl -ba -nrz | + while read n hash; do + git checkout --quiet $hash $file + cp -f $file $tmp/$n + done +(cd $tmp && zip -q - *) > testdata.zip +rm -fr $tmp +git restore --staged $file +git restore $file +echo "Created testdata.zip" diff --git a/internal/lsp/diff/lcs/labels.go b/internal/lsp/diff/lcs/labels.go new file mode 100644 index 00000000000..0689f1ed700 --- /dev/null +++ b/internal/lsp/diff/lcs/labels.go @@ -0,0 +1,55 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lcs + +import ( + "fmt" +) + +// For each D, vec[D] has length D+1, +// and the label for (D, k) is stored in vec[D][(D+k)/2]. +type label struct { + vec [][]int +} + +// Temporary checking DO NOT COMMIT true TO PRODUCTION CODE +const debug = false + +// debugging. check that the (d,k) pair is valid +// (that is, -d<=k<=d and d+k even) +func checkDK(D, k int) { + if k >= -D && k <= D && (D+k)%2 == 0 { + return + } + panic(fmt.Sprintf("out of range, d=%d,k=%d", D, k)) +} + +func (t *label) set(D, k, x int) { + if debug { + checkDK(D, k) + } + for len(t.vec) <= D { + t.vec = append(t.vec, nil) + } + if t.vec[D] == nil { + t.vec[D] = make([]int, D+1) + } + t.vec[D][(D+k)/2] = x // known that D+k is even +} + +func (t *label) get(d, k int) int { + if debug { + checkDK(d, k) + } + return int(t.vec[d][(d+k)/2]) +} + +func newtriang(limit int) label { + if limit < 100 { + // Preallocate if limit is not large. + return label{vec: make([][]int, limit)} + } + return label{} +} diff --git a/internal/lsp/diff/lcs/old.go b/internal/lsp/diff/lcs/old.go new file mode 100644 index 00000000000..a091edd5501 --- /dev/null +++ b/internal/lsp/diff/lcs/old.go @@ -0,0 +1,530 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lcs + +import ( + "fmt" + "strings" +) + +// non generic code. The names have Old at the end to indicate they are the +// the implementation that doesn't use generics. + +// Compute the Diffs and the lcs. +func Compute(a, b interface{}, limit int) ([]Diff, lcs) { + var ans lcs + g := newegraph(a, b, limit) + ans = g.twosided() + diffs := g.fromlcs(ans) + return diffs, ans +} + +// editGraph carries the information for computing the lcs for []byte, []rune, or []string. +type editGraph struct { + eq eq // how to compare elements of A, B, and convert slices to strings + vf, vb label // forward and backward labels + + limit int // maximal value of D + // the bounding rectangle of the current edit graph + lx, ly, ux, uy int + delta int // common subexpression: (ux-lx)-(uy-ly) +} + +// abstraction in place of generic +type eq interface { + eq(i, j int) bool + substr(i, j int) string // string from b[i:j] + lena() int + lenb() int +} + +type byteeq struct { + a, b []byte // the input was ascii. perhaps these could be strings +} + +func (x *byteeq) eq(i, j int) bool { return x.a[i] == x.b[j] } +func (x *byteeq) substr(i, j int) string { return string(x.b[i:j]) } +func (x *byteeq) lena() int { return int(len(x.a)) } +func (x *byteeq) lenb() int { return int(len(x.b)) } + +type runeeq struct { + a, b []rune +} + +func (x *runeeq) eq(i, j int) bool { return x.a[i] == x.b[j] } +func (x *runeeq) substr(i, j int) string { return string(x.b[i:j]) } +func (x *runeeq) lena() int { return int(len(x.a)) } +func (x *runeeq) lenb() int { return int(len(x.b)) } + +type lineeq struct { + a, b []string +} + +func (x *lineeq) eq(i, j int) bool { return x.a[i] == x.b[j] } +func (x *lineeq) substr(i, j int) string { return strings.Join(x.b[i:j], "") } +func (x *lineeq) lena() int { return int(len(x.a)) } +func (x *lineeq) lenb() int { return int(len(x.b)) } + +func neweq(a, b interface{}) eq { + switch x := a.(type) { + case []byte: + return &byteeq{a: x, b: b.([]byte)} + case []rune: + return &runeeq{a: x, b: b.([]rune)} + case []string: + return &lineeq{a: x, b: b.([]string)} + default: + panic(fmt.Sprintf("unexpected type %T in neweq", x)) + } +} + +func (g *editGraph) fromlcs(lcs lcs) []Diff { + var ans []Diff + var pa, pb int // offsets in a, b + for _, l := range lcs { + if pa < l.X && pb < l.Y { + ans = append(ans, Diff{pa, l.X, g.eq.substr(pb, l.Y)}) + } else if pa < l.X { + ans = append(ans, Diff{pa, l.X, ""}) + } else if pb < l.Y { + ans = append(ans, Diff{pa, l.X, g.eq.substr(pb, l.Y)}) + } + pa = l.X + l.Len + pb = l.Y + l.Len + } + if pa < g.eq.lena() && pb < g.eq.lenb() { + ans = append(ans, Diff{pa, g.eq.lena(), g.eq.substr(pb, g.eq.lenb())}) + } else if pa < g.eq.lena() { + ans = append(ans, Diff{pa, g.eq.lena(), ""}) + } else if pb < g.eq.lenb() { + ans = append(ans, Diff{pa, g.eq.lena(), g.eq.substr(pb, g.eq.lenb())}) + } + return ans +} + +func newegraph(a, b interface{}, limit int) *editGraph { + if limit <= 0 { + limit = 1 << 25 // effectively infinity + } + var alen, blen int + switch a := a.(type) { + case []byte: + alen, blen = len(a), len(b.([]byte)) + case []rune: + alen, blen = len(a), len(b.([]rune)) + case []string: + alen, blen = len(a), len(b.([]string)) + default: + panic(fmt.Sprintf("unexpected type %T in newegraph", a)) + } + ans := &editGraph{eq: neweq(a, b), vf: newtriang(limit), vb: newtriang(limit), limit: int(limit), + ux: alen, uy: blen, delta: alen - blen} + return ans +} + +// --- FORWARD --- +// fdone decides if the forwward path has reached the upper right +// corner of the rectangele. If so, it also returns the computed lcs. +func (e *editGraph) fdone(D, k int) (bool, lcs) { + // x, y, k are relative to the rectangle + x := e.vf.get(D, k) + y := x - k + if x == e.ux && y == e.uy { + return true, e.forwardlcs(D, k) + } + return false, nil +} + +// run the forward algorithm, until success or up to the limit on D. +func (e *editGraph) forward() lcs { + e.setForward(0, 0, e.lx) + if ok, ans := e.fdone(0, 0); ok { + return ans + } + // from D to D+1 + for D := 0; D < e.limit; D++ { + e.setForward(D+1, -(D + 1), e.getForward(D, -D)) + if ok, ans := e.fdone(D+1, -(D + 1)); ok { + return ans + } + e.setForward(D+1, D+1, e.getForward(D, D)+1) + if ok, ans := e.fdone(D+1, D+1); ok { + return ans + } + for k := -D + 1; k <= D-1; k += 2 { + // these are tricky and easy to get backwards + lookv := e.lookForward(k, e.getForward(D, k-1)+1) + lookh := e.lookForward(k, e.getForward(D, k+1)) + if lookv > lookh { + e.setForward(D+1, k, lookv) + } else { + e.setForward(D+1, k, lookh) + } + if ok, ans := e.fdone(D+1, k); ok { + return ans + } + } + } + // D is too large + // find the D path with maximal x+y inside the rectangle and + // use that to compute the found part of the lcs + kmax := -e.limit - 1 + diagmax := -1 + for k := -e.limit; k <= e.limit; k += 2 { + x := e.getForward(e.limit, k) + y := x - k + if x+y > diagmax && x <= e.ux && y <= e.uy { + diagmax, kmax = x+y, k + } + } + return e.forwardlcs(e.limit, kmax) +} + +// recover the lcs by backtracking from the farthest point reached +func (e *editGraph) forwardlcs(D, k int) lcs { + var ans lcs + for x := e.getForward(D, k); x != 0 || x-k != 0; { + if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) { + // if (x-1,y) is labelled D-1, x--,D--,k--,continue + D, k, x = D-1, k-1, x-1 + continue + } else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) { + // if (x,y-1) is labelled D-1, x, D--,k++, continue + D, k = D-1, k+1 + continue + } + // if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue + y := x - k + realx, realy := x+e.lx, y+e.ly + if e.eq.eq(realx-1, realy-1) { + ans = prependlcs(ans, realx-1, realy-1) + x-- + } else { + panic("broken path") + } + } + return ans +} + +// start at (x,y), go up the diagonal as far as possible, +// and label the result with d +func (e *editGraph) lookForward(k, relx int) int { + rely := relx - k + x, y := relx+e.lx, rely+e.ly + for x < e.ux && y < e.uy && e.eq.eq(x, y) { + x++ + y++ + } + return x +} + +func (e *editGraph) setForward(d, k, relx int) { + x := e.lookForward(k, relx) + e.vf.set(d, k, x-e.lx) +} + +func (e *editGraph) getForward(d, k int) int { + x := e.vf.get(d, k) + return x +} + +// --- BACKWARD --- +// bdone decides if the backward path has reached the lower left corner +func (e *editGraph) bdone(D, k int) (bool, lcs) { + // x, y, k are relative to the rectangle + x := e.vb.get(D, k) + y := x - (k + e.delta) + if x == 0 && y == 0 { + return true, e.backwardlcs(D, k) + } + return false, nil +} + +// run the backward algorithm, until success or up to the limit on D. +func (e *editGraph) backward() lcs { + e.setBackward(0, 0, e.ux) + if ok, ans := e.bdone(0, 0); ok { + return ans + } + // from D to D+1 + for D := 0; D < e.limit; D++ { + e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1) + if ok, ans := e.bdone(D+1, -(D + 1)); ok { + return ans + } + e.setBackward(D+1, D+1, e.getBackward(D, D)) + if ok, ans := e.bdone(D+1, D+1); ok { + return ans + } + for k := -D + 1; k <= D-1; k += 2 { + // these are tricky and easy to get wrong + lookv := e.lookBackward(k, e.getBackward(D, k-1)) + lookh := e.lookBackward(k, e.getBackward(D, k+1)-1) + if lookv < lookh { + e.setBackward(D+1, k, lookv) + } else { + e.setBackward(D+1, k, lookh) + } + if ok, ans := e.bdone(D+1, k); ok { + return ans + } + } + } + + // D is too large + // find the D path with minimal x+y inside the rectangle and + // use that to compute the part of the lcs found + kmax := -e.limit - 1 + diagmin := 1 << 25 + for k := -e.limit; k <= e.limit; k += 2 { + x := e.getBackward(e.limit, k) + y := x - (k + e.delta) + if x+y < diagmin && x >= 0 && y >= 0 { + diagmin, kmax = x+y, k + } + } + if kmax < -e.limit { + panic(fmt.Sprintf("no paths when limit=%d?", e.limit)) + } + return e.backwardlcs(e.limit, kmax) +} + +// recover the lcs by backtracking +func (e *editGraph) backwardlcs(D, k int) lcs { + var ans lcs + for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; { + if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) { + // D--, k--, x unchanged + D, k = D-1, k-1 + continue + } else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) { + // D--, k++, x++ + D, k, x = D-1, k+1, x+1 + continue + } + y := x - (k + e.delta) + realx, realy := x+e.lx, y+e.ly + if e.eq.eq(realx, realy) { + ans = appendlcs(ans, realx, realy) + x++ + } else { + panic("broken path") + } + } + return ans +} + +// start at (x,y), go down the diagonal as far as possible, +func (e *editGraph) lookBackward(k, relx int) int { + rely := relx - (k + e.delta) // forward k = k + e.delta + x, y := relx+e.lx, rely+e.ly + for x > 0 && y > 0 && e.eq.eq(x-1, y-1) { + x-- + y-- + } + return x +} + +// convert to rectangle, and label the result with d +func (e *editGraph) setBackward(d, k, relx int) { + x := e.lookBackward(k, relx) + e.vb.set(d, k, x-e.lx) +} + +func (e *editGraph) getBackward(d, k int) int { + x := e.vb.get(d, k) + return x +} + +// -- TWOSIDED --- + +func (e *editGraph) twosided() lcs { + // The termination condition could be improved, as either the forward + // or backward pass could succeed before Myers' Lemma applies. + // Aside from questions of efficiency (is the extra testing cost-effective) + // this is more likely to matter when e.limit is reached. + e.setForward(0, 0, e.lx) + e.setBackward(0, 0, e.ux) + + // from D to D+1 + for D := 0; D < e.limit; D++ { + // just finished a backwards pass, so check + if got, ok := e.twoDone(D, D); ok { + return e.twolcs(D, D, got) + } + // do a forwards pass (D to D+1) + e.setForward(D+1, -(D + 1), e.getForward(D, -D)) + e.setForward(D+1, D+1, e.getForward(D, D)+1) + for k := -D + 1; k <= D-1; k += 2 { + // these are tricky and easy to get backwards + lookv := e.lookForward(k, e.getForward(D, k-1)+1) + lookh := e.lookForward(k, e.getForward(D, k+1)) + if lookv > lookh { + e.setForward(D+1, k, lookv) + } else { + e.setForward(D+1, k, lookh) + } + } + // just did a forward pass, so check + if got, ok := e.twoDone(D+1, D); ok { + return e.twolcs(D+1, D, got) + } + // do a backward pass, D to D+1 + e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1) + e.setBackward(D+1, D+1, e.getBackward(D, D)) + for k := -D + 1; k <= D-1; k += 2 { + // these are tricky and easy to get wrong + lookv := e.lookBackward(k, e.getBackward(D, k-1)) + lookh := e.lookBackward(k, e.getBackward(D, k+1)-1) + if lookv < lookh { + e.setBackward(D+1, k, lookv) + } else { + e.setBackward(D+1, k, lookh) + } + } + } + + // D too large. combine a forward and backward partial lcs + // first, a forward one + kmax := -e.limit - 1 + diagmax := -1 + for k := -e.limit; k <= e.limit; k += 2 { + x := e.getForward(e.limit, k) + y := x - k + if x+y > diagmax && x <= e.ux && y <= e.uy { + diagmax, kmax = x+y, k + } + } + if kmax < -e.limit { + panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit)) + } + lcs := e.forwardlcs(e.limit, kmax) + // now a backward one + // find the D path with minimal x+y inside the rectangle and + // use that to compute the lcs + diagmin := 1 << 25 // infinity + for k := -e.limit; k <= e.limit; k += 2 { + x := e.getBackward(e.limit, k) + y := x - (k + e.delta) + if x+y < diagmin && x >= 0 && y >= 0 { + diagmin, kmax = x+y, k + } + } + if kmax < -e.limit { + panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit)) + } + lcs = append(lcs, e.backwardlcs(e.limit, kmax)...) + // These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs) + ans := lcs.fix() + return ans +} + +// Does Myers' Lemma apply? +func (e *editGraph) twoDone(df, db int) (int, bool) { + if (df+db+e.delta)%2 != 0 { + return 0, false // diagonals cannot overlap + } + kmin := -db + e.delta + if -df > kmin { + kmin = -df + } + kmax := db + e.delta + if df < kmax { + kmax = df + } + for k := kmin; k <= kmax; k += 2 { + x := e.vf.get(df, k) + u := e.vb.get(db, k-e.delta) + if u <= x { + // is it worth looking at all the other k? + for l := k; l <= kmax; l += 2 { + x := e.vf.get(df, l) + y := x - l + u := e.vb.get(db, l-e.delta) + v := u - l + if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux { + return l, true + } + } + return k, true + } + } + return 0, false +} + +func (e *editGraph) twolcs(df, db, kf int) lcs { + // db==df || db+1==df + x := e.vf.get(df, kf) + y := x - kf + kb := kf - e.delta + u := e.vb.get(db, kb) + v := u - kf + + // Myers proved there is a df-path from (0,0) to (u,v) + // and a db-path from (x,y) to (N,M). + // In the first case the overall path is the forward path + // to (u,v) followed by the backward path to (N,M). + // In the second case the path is the backward path to (x,y) + // followed by the forward path to (x,y) from (0,0). + + // Look for some special cases to avoid computing either of these paths. + if x == u { + // "babaab" "cccaba" + // already patched together + lcs := e.forwardlcs(df, kf) + lcs = append(lcs, e.backwardlcs(db, kb)...) + return lcs.sort() + } + + // is (u-1,v) or (u,v-1) labelled df-1? + // if so, that forward df-1-path plus a horizontal or vertical edge + // is the df-path to (u,v), then plus the db-path to (N,M) + if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 { + // "aabbab" "cbcabc" + lcs := e.forwardlcs(df-1, u-1-v) + lcs = append(lcs, e.backwardlcs(db, kb)...) + return lcs.sort() + } + if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u { + // "abaabb" "bcacab" + lcs := e.forwardlcs(df-1, u-(v-1)) + lcs = append(lcs, e.backwardlcs(db, kb)...) + return lcs.sort() + } + + // The path can't possibly contribute to the lcs because it + // is all horizontal or vertical edges + if u == 0 || v == 0 || x == e.ux || y == e.uy { + // "abaabb" "abaaaa" + if u == 0 || v == 0 { + return e.backwardlcs(db, kb) + } + return e.forwardlcs(df, kf) + } + + // is (x+1,y) or (x,y+1) labelled db-1? + if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 { + // "bababb" "baaabb" + lcs := e.backwardlcs(db-1, kb+1) + lcs = append(lcs, e.forwardlcs(df, kf)...) + return lcs.sort() + } + if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x { + // "abbbaa" "cabacc" + lcs := e.backwardlcs(db-1, kb-1) + lcs = append(lcs, e.forwardlcs(df, kf)...) + return lcs.sort() + } + + // need to compute another path + // "aabbaa" "aacaba" + lcs := e.backwardlcs(db, kb) + oldx, oldy := e.ux, e.uy + e.ux = u + e.uy = v + lcs = append(lcs, e.forward()...) + e.ux, e.uy = oldx, oldy + return lcs.sort() +} diff --git a/internal/lsp/diff/lcs/old_test.go b/internal/lsp/diff/lcs/old_test.go new file mode 100644 index 00000000000..ba22fe6f461 --- /dev/null +++ b/internal/lsp/diff/lcs/old_test.go @@ -0,0 +1,203 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lcs + +import ( + "math/rand" + "testing" +) + +func TestForwardOld(t *testing.T) { + for _, tx := range Btests { + lim := len(tx.a) + len(tx.b) + left, right := []byte(tx.a), []byte(tx.b) + g := newegraph(left, right, lim) + lcs := g.forward() + diffs := g.fromlcs(lcs) + check(t, tx.a, lcs, tx.lcs) + checkDiffs(t, tx.a, diffs, tx.b) + + g = newegraph(right, left, lim) + lcs = g.forward() + diffs = g.fromlcs(lcs) + check(t, tx.b, lcs, tx.lcs) + checkDiffs(t, tx.b, diffs, tx.a) + } +} + +func TestBackwardOld(t *testing.T) { + for _, tx := range Btests { + lim := len(tx.a) + len(tx.b) + left, right := []byte(tx.a), []byte(tx.b) + g := newegraph(left, right, lim) + lcs := g.backward() + check(t, tx.a, lcs, tx.lcs) + diffs := g.fromlcs(lcs) + checkDiffs(t, tx.a, diffs, tx.b) + + g = newegraph(right, left, lim) + lcs = g.backward() + diffs = g.fromlcs(lcs) + check(t, tx.b, lcs, tx.lcs) + checkDiffs(t, tx.b, diffs, tx.a) + } +} + +func TestTwosidedOld(t *testing.T) { + // test both (a,b) and (b,a) + for _, tx := range Btests { + left, right := []byte(tx.a), []byte(tx.b) + lim := len(tx.a) + len(tx.b) + diffs, lcs := Compute(left, right, lim) + check(t, tx.a, lcs, tx.lcs) + checkDiffs(t, tx.a, diffs, tx.b) + diffs, lcs = Compute(right, left, lim) + check(t, tx.b, lcs, tx.lcs) + checkDiffs(t, tx.b, diffs, tx.a) + } +} + +func TestIntOld(t *testing.T) { + // need to avoid any characters in btests + lfill, rfill := "AAAAAAAAAAAA", "BBBBBBBBBBBB" + for _, tx := range Btests { + if len(tx.a) < 2 || len(tx.b) < 2 { + continue + } + left := []byte(tx.a + lfill) + right := []byte(tx.b + rfill) + lim := len(tx.a) + len(tx.b) + diffs, lcs := Compute(left, right, lim) + check(t, string(left), lcs, tx.lcs) + checkDiffs(t, string(left), diffs, string(right)) + diffs, lcs = Compute(right, left, lim) + check(t, string(right), lcs, tx.lcs) + checkDiffs(t, string(right), diffs, string(left)) + + left = []byte(lfill + tx.a) + right = []byte(rfill + tx.b) + diffs, lcs = Compute(left, right, lim) + check(t, string(left), lcs, tx.lcs) + checkDiffs(t, string(left), diffs, string(right)) + diffs, lcs = Compute(right, left, lim) + check(t, string(right), lcs, tx.lcs) + checkDiffs(t, string(right), diffs, string(left)) + } +} + +func TestSpecialOld(t *testing.T) { // needs lcs.fix + a := []byte("golang.org/x/tools/intern") + b := []byte("github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/intern") + diffs, lcs := Compute(a, b, 4) + if !lcs.valid() { + t.Errorf("%d,%v", len(diffs), lcs) + } +} + +func TestRegressionOld001(t *testing.T) { + a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/lsp/diff/difftest\"\n\t\"golang.org/x/tools/internal/span\"\n)\n" + + b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/internal/lsp/diff\"\n\t\"golang.org/x/tools/internal/lsp/diff/difftest\"\n\t\"golang.org/x/tools/internal/span\"\n)\n" + for i := 1; i < len(b); i++ { + diffs, lcs := Compute([]byte(a), []byte(b), int(i)) // 14 from gopls + if !lcs.valid() { + t.Errorf("%d,%v", len(diffs), lcs) + } + checkDiffs(t, a, diffs, b) + } +} + +func TestRegressionOld002(t *testing.T) { + a := "n\"\n)\n" + b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n" + for i := 1; i <= len(b); i++ { + diffs, lcs := Compute([]byte(a), []byte(b), int(i)) + if !lcs.valid() { + t.Errorf("%d,%v", len(diffs), lcs) + } + checkDiffs(t, a, diffs, b) + } +} + +func TestRegressionOld003(t *testing.T) { + a := "golang.org/x/hello v1.0.0\nrequire golang.org/x/unused v1" + b := "golang.org/x/hello v1" + for i := 1; i <= len(a); i++ { + diffs, lcs := Compute([]byte(a), []byte(b), int(i)) + if !lcs.valid() { + t.Errorf("%d,%v", len(diffs), lcs) + } + checkDiffs(t, a, diffs, b) + } +} + +func TestRandOld(t *testing.T) { + rand.Seed(1) + for i := 0; i < 1000; i++ { + a := []rune(randstr("abω", 16)) + b := []rune(randstr("abωc", 16)) + g := newegraph(a, b, 24) // large enough to get true lcs + two := g.twosided() + forw := g.forward() + back := g.backward() + if lcslen(two) != lcslen(forw) || lcslen(forw) != lcslen(back) { + t.Logf("\n%v\n%v\n%v", forw, back, two) + t.Fatalf("%d forw:%d back:%d two:%d", i, lcslen(forw), lcslen(back), lcslen(two)) + } + if !two.valid() || !forw.valid() || !back.valid() { + t.Errorf("check failure") + } + } +} + +func BenchmarkTwoOld(b *testing.B) { + tests := genBench("abc", 96) + for i := 0; i < b.N; i++ { + for _, tt := range tests { + _, two := Compute([]byte(tt.before), []byte(tt.after), 100) + if !two.valid() { + b.Error("check failed") + } + } + } +} + +func BenchmarkForwOld(b *testing.B) { + tests := genBench("abc", 96) + for i := 0; i < b.N; i++ { + for _, tt := range tests { + _, two := Compute([]byte(tt.before), []byte(tt.after), 100) + if !two.valid() { + b.Error("check failed") + } + } + } +} + +func genBench(set string, n int) []struct{ before, after string } { + // before and after for benchmarks. 24 strings of length n with + // before and after differing at least once, and about 5% + rand.Seed(3) + var ans []struct{ before, after string } + for i := 0; i < 24; i++ { + // maybe b should have an approximately known number of diffs + a := randstr(set, n) + cnt := 0 + bb := make([]rune, 0, n) + for _, r := range a { + if rand.Float64() < .05 { + cnt++ + r = 'N' + } + bb = append(bb, r) + } + if cnt == 0 { + // avoid == shortcut + bb[n/2] = 'N' + } + ans = append(ans, struct{ before, after string }{a, string(bb)}) + } + return ans +} diff --git a/internal/lsp/diff/ndiff.go b/internal/lsp/diff/ndiff.go new file mode 100644 index 00000000000..8f7732dd019 --- /dev/null +++ b/internal/lsp/diff/ndiff.go @@ -0,0 +1,130 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package diff + +import ( + "strings" + "unicode/utf8" + + "golang.org/x/tools/internal/lsp/diff/lcs" + "golang.org/x/tools/internal/span" +) + +// maxDiffs is a limit on how deeply the lcs algorithm should search +// the value is just a guess +const maxDiffs = 30 + +// NComputeEdits computes TextEdits for strings +// (both it and the diff in the myers package have type ComputeEdits, which +// is why the arguments are strings, not []bytes.) +func NComputeEdits(uri span.URI, before, after string) ([]TextEdit, error) { + if before == after { + // very frequently true + return nil, nil + } + // the diffs returned by the lcs package use indexes into whatever slice + // was passed in. TextEdits need a span.Span which is computed with + // byte offsets, so rune or line offsets need to be converted. + if needrunes(before) || needrunes(after) { + diffs, _ := lcs.Compute([]rune(before), []rune(after), maxDiffs/2) + diffs = runeOffsets(diffs, []rune(before)) + ans, err := convertDiffs(uri, diffs, []byte(before)) + return ans, err + } else { + diffs, _ := lcs.Compute([]byte(before), []byte(after), maxDiffs/2) + ans, err := convertDiffs(uri, diffs, []byte(before)) + return ans, err + } +} + +// NComputeLineEdits computes TextEdits for []strings +func NComputeLineEdits(uri span.URI, before, after []string) ([]TextEdit, error) { + diffs, _ := lcs.Compute(before, after, maxDiffs/2) + diffs = lineOffsets(diffs, before) + ans, err := convertDiffs(uri, diffs, []byte(strJoin(before))) + // the code is not coping with possible missing \ns at the ends + return ans, err +} + +// convert diffs with byte offsets into diffs with line and column +func convertDiffs(uri span.URI, diffs []lcs.Diff, src []byte) ([]TextEdit, error) { + ans := make([]TextEdit, len(diffs)) + tf := span.NewTokenFile(uri.Filename(), src) + for i, d := range diffs { + s := newSpan(uri, d.Start, d.End) + s, err := s.WithPosition(tf) + if err != nil { + return nil, err + } + ans[i] = TextEdit{s, d.Text} + } + return ans, nil +} + +// convert diffs with rune offsets into diffs with byte offsets +func runeOffsets(diffs []lcs.Diff, src []rune) []lcs.Diff { + var idx int + var tmp strings.Builder // string because []byte([]rune) is illegal + for i, d := range diffs { + tmp.WriteString(string(src[idx:d.Start])) + v := tmp.Len() + tmp.WriteString(string(src[d.Start:d.End])) + d.Start = v + idx = d.End + d.End = tmp.Len() + diffs[i] = d + } + return diffs +} + +// convert diffs with line offsets into diffs with byte offsets +func lineOffsets(diffs []lcs.Diff, src []string) []lcs.Diff { + var idx int + var tmp strings.Builder // bytes/ + for i, d := range diffs { + tmp.WriteString(strJoin(src[idx:d.Start])) + v := tmp.Len() + tmp.WriteString(strJoin(src[d.Start:d.End])) + d.Start = v + idx = d.End + d.End = tmp.Len() + diffs[i] = d + } + return diffs +} + +// join lines. (strings.Join doesn't add a trailing separator) +func strJoin(elems []string) string { + if len(elems) == 0 { + return "" + } + n := 0 + for i := 0; i < len(elems); i++ { + n += len(elems[i]) + } + + var b strings.Builder + b.Grow(n) + for _, s := range elems { + b.WriteString(s) + //b.WriteByte('\n') + } + return b.String() +} + +func newSpan(uri span.URI, left, right int) span.Span { + return span.New(uri, span.NewPoint(0, 0, left), span.NewPoint(0, 0, right)) +} + +// need runes is true if the string needs to be converted to []rune +// for random access +func needrunes(s string) bool { + for i := 0; i < len(s); i++ { + if s[i] >= utf8.RuneSelf { + return true + } + } + return false +} diff --git a/internal/lsp/source/options.go b/internal/lsp/source/options.go index c386eee45e7..8fa78ae2610 100644 --- a/internal/lsp/source/options.go +++ b/internal/lsp/source/options.go @@ -477,7 +477,7 @@ type Hooks struct { // LicensesText holds third party licenses for software used by gopls. LicensesText string - // TODO(rfindley): is this even necessary? + // GoDiff is used in gopls/hooks to get Myers' diff GoDiff bool // Whether staticcheck is supported. @@ -562,6 +562,13 @@ type InternalOptions struct { // on the server. // This option applies only during initialization. ShowBugReports bool + + // NewDiff controls the choice of the new diff implementation. + // It can be 'new', 'checked', or 'old' which is the default. + // 'checked' computes diffs with both algorithms, checks + // that the new algorithm has worked, and write some summary + // statistics to a file in os.TmpDir() + NewDiff string } type ImportShortcut string @@ -1059,6 +1066,9 @@ func (o *Options) set(name string, value interface{}, seen map[string]struct{}) // This setting should be handled before all of the other options are // processed, so do nothing here. + case "newDiff": + result.setString(&o.NewDiff) + // Replaced settings. case "experimentalDisabledAnalyses": result.deprecated("analyses") diff --git a/internal/lsp/tests/tests.go b/internal/lsp/tests/tests.go index ec804e5e79e..b60fbf03866 100644 --- a/internal/lsp/tests/tests.go +++ b/internal/lsp/tests/tests.go @@ -265,6 +265,7 @@ func DefaultOptions(o *source.Options) { o.HierarchicalDocumentSymbolSupport = true o.ExperimentalWorkspaceModule = true o.SemanticTokens = true + o.InternalOptions.NewDiff = "both" } func RunTests(t *testing.T, dataDir string, includeMultiModule bool, f func(*testing.T, *Data)) {