diff --git a/digestset/set.go b/digestset/set.go new file mode 100644 index 0000000..71f2418 --- /dev/null +++ b/digestset/set.go @@ -0,0 +1,262 @@ +// Copyright 2020, 2020 OCI Contributors +// Copyright 2017 Docker, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package digestset + +import ( + "errors" + "sort" + "strings" + "sync" + + digest "github.com/opencontainers/go-digest" +) + +var ( + // ErrDigestNotFound is used when a matching digest + // could not be found in a set. + ErrDigestNotFound = errors.New("digest not found") + + // ErrDigestAmbiguous is used when multiple digests + // are found in a set. None of the matching digests + // should be considered valid matches. + ErrDigestAmbiguous = errors.New("ambiguous digest string") +) + +// Set is used to hold a unique set of digests which +// may be easily referenced by easily referenced by a string +// representation of the digest as well as short representation. +// The uniqueness of the short representation is based on other +// digests in the set. If digests are omitted from this set, +// collisions in a larger set may not be detected, therefore it +// is important to always do short representation lookups on +// the complete set of digests. To mitigate collisions, an +// appropriately long short code should be used. +type Set struct { + mutex sync.RWMutex + entries digestEntries +} + +// NewSet creates an empty set of digests +// which may have digests added. +func NewSet() *Set { + return &Set{ + entries: digestEntries{}, + } +} + +// checkShortMatch checks whether two digests match as either whole +// values or short values. This function does not test equality, +// rather whether the second value could match against the first +// value. +func checkShortMatch(alg digest.Algorithm, hex, shortAlg, shortHex string) bool { + if len(hex) == len(shortHex) { + if hex != shortHex { + return false + } + if len(shortAlg) > 0 && string(alg) != shortAlg { + return false + } + } else if !strings.HasPrefix(hex, shortHex) { + return false + } else if len(shortAlg) > 0 && string(alg) != shortAlg { + return false + } + return true +} + +// Lookup looks for a digest matching the given string representation. +// If no digests could be found ErrDigestNotFound will be returned +// with an empty digest value. If multiple matches are found +// ErrDigestAmbiguous will be returned with an empty digest value. +func (dst *Set) Lookup(d string) (digest.Digest, error) { + dst.mutex.RLock() + defer dst.mutex.RUnlock() + if len(dst.entries) == 0 { + return "", ErrDigestNotFound + } + var ( + searchFunc func(int) bool + alg digest.Algorithm + hex string + ) + dgst, err := digest.Parse(d) + if err == digest.ErrDigestInvalidFormat { + hex = d + searchFunc = func(i int) bool { + return dst.entries[i].val >= d + } + } else { + hex = dgst.Hex() + alg = dgst.Algorithm() + searchFunc = func(i int) bool { + if dst.entries[i].val == hex { + return dst.entries[i].alg >= alg + } + return dst.entries[i].val >= hex + } + } + idx := sort.Search(len(dst.entries), searchFunc) + if idx == len(dst.entries) || !checkShortMatch(dst.entries[idx].alg, dst.entries[idx].val, string(alg), hex) { + return "", ErrDigestNotFound + } + if dst.entries[idx].alg == alg && dst.entries[idx].val == hex { + return dst.entries[idx].digest, nil + } + if idx+1 < len(dst.entries) && checkShortMatch(dst.entries[idx+1].alg, dst.entries[idx+1].val, string(alg), hex) { + return "", ErrDigestAmbiguous + } + + return dst.entries[idx].digest, nil +} + +// Add adds the given digest to the set. An error will be returned +// if the given digest is invalid. If the digest already exists in the +// set, this operation will be a no-op. +func (dst *Set) Add(d digest.Digest) error { + if err := d.Validate(); err != nil { + return err + } + dst.mutex.Lock() + defer dst.mutex.Unlock() + entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d} + searchFunc := func(i int) bool { + if dst.entries[i].val == entry.val { + return dst.entries[i].alg >= entry.alg + } + return dst.entries[i].val >= entry.val + } + idx := sort.Search(len(dst.entries), searchFunc) + if idx == len(dst.entries) { + dst.entries = append(dst.entries, entry) + return nil + } else if dst.entries[idx].digest == d { + return nil + } + + entries := append(dst.entries, nil) + copy(entries[idx+1:], entries[idx:len(entries)-1]) + entries[idx] = entry + dst.entries = entries + return nil +} + +// Remove removes the given digest from the set. An err will be +// returned if the given digest is invalid. If the digest does +// not exist in the set, this operation will be a no-op. +func (dst *Set) Remove(d digest.Digest) error { + if err := d.Validate(); err != nil { + return err + } + dst.mutex.Lock() + defer dst.mutex.Unlock() + entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d} + searchFunc := func(i int) bool { + if dst.entries[i].val == entry.val { + return dst.entries[i].alg >= entry.alg + } + return dst.entries[i].val >= entry.val + } + idx := sort.Search(len(dst.entries), searchFunc) + // Not found if idx is after or value at idx is not digest + if idx == len(dst.entries) || dst.entries[idx].digest != d { + return nil + } + + entries := dst.entries + copy(entries[idx:], entries[idx+1:]) + entries = entries[:len(entries)-1] + dst.entries = entries + + return nil +} + +// All returns all the digests in the set +func (dst *Set) All() []digest.Digest { + dst.mutex.RLock() + defer dst.mutex.RUnlock() + retValues := make([]digest.Digest, len(dst.entries)) + for i := range dst.entries { + retValues[i] = dst.entries[i].digest + } + + return retValues +} + +// ShortCodeTable returns a map of Digest to unique short codes. The +// length represents the minimum value, the maximum length may be the +// entire value of digest if uniqueness cannot be achieved without the +// full value. This function will attempt to make short codes as short +// as possible to be unique. +func ShortCodeTable(dst *Set, length int) map[digest.Digest]string { + dst.mutex.RLock() + defer dst.mutex.RUnlock() + m := make(map[digest.Digest]string, len(dst.entries)) + l := length + resetIdx := 0 + for i := 0; i < len(dst.entries); i++ { + var short string + extended := true + for extended { + extended = false + if len(dst.entries[i].val) <= l { + short = dst.entries[i].digest.String() + } else { + short = dst.entries[i].val[:l] + for j := i + 1; j < len(dst.entries); j++ { + if checkShortMatch(dst.entries[j].alg, dst.entries[j].val, "", short) { + if j > resetIdx { + resetIdx = j + } + extended = true + } else { + break + } + } + if extended { + l++ + } + } + } + m[dst.entries[i].digest] = short + if i >= resetIdx { + l = length + } + } + return m +} + +type digestEntry struct { + alg digest.Algorithm + val string + digest digest.Digest +} + +type digestEntries []*digestEntry + +func (d digestEntries) Len() int { + return len(d) +} + +func (d digestEntries) Less(i, j int) bool { + if d[i].val != d[j].val { + return d[i].val < d[j].val + } + return d[i].alg < d[j].alg +} + +func (d digestEntries) Swap(i, j int) { + d[i], d[j] = d[j], d[i] +} diff --git a/digestset/set_test.go b/digestset/set_test.go new file mode 100644 index 0000000..7a01c24 --- /dev/null +++ b/digestset/set_test.go @@ -0,0 +1,386 @@ +// Copyright 2020, 2020 OCI Contributors +// Copyright 2017 Docker, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package digestset + +import ( + "crypto/sha256" + _ "crypto/sha512" + "encoding/binary" + "math/rand" + "testing" + + digest "github.com/opencontainers/go-digest" +) + +func assertEqualDigests(t *testing.T, d1, d2 digest.Digest) { + if d1 != d2 { + t.Fatalf("Digests do not match:\n\tActual: %s\n\tExpected: %s", d1, d2) + } +} + +func TestLookup(t *testing.T) { + digests := []digest.Digest{ + "sha256:1234511111111111111111111111111111111111111111111111111111111111", + "sha256:1234111111111111111111111111111111111111111111111111111111111111", + "sha256:1234611111111111111111111111111111111111111111111111111111111111", + "sha256:5432111111111111111111111111111111111111111111111111111111111111", + "sha256:6543111111111111111111111111111111111111111111111111111111111111", + "sha256:6432111111111111111111111111111111111111111111111111111111111111", + "sha256:6542111111111111111111111111111111111111111111111111111111111111", + "sha256:6532111111111111111111111111111111111111111111111111111111111111", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + dgst, err := dset.Lookup("54") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[3]) + + _, err = dset.Lookup("1234") + if err == nil { + t.Fatal("Expected ambiguous error looking up: 1234") + } + if err != ErrDigestAmbiguous { + t.Fatal(err) + } + + _, err = dset.Lookup("9876") + if err == nil { + t.Fatal("Expected not found error looking up: 9876") + } + if err != ErrDigestNotFound { + t.Fatal(err) + } + + _, err = dset.Lookup("sha256:1234") + if err == nil { + t.Fatal("Expected ambiguous error looking up: sha256:1234") + } + if err != ErrDigestAmbiguous { + t.Fatal(err) + } + + dgst, err = dset.Lookup("sha256:12345") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[0]) + + dgst, err = dset.Lookup("sha256:12346") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[2]) + + dgst, err = dset.Lookup("12346") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[2]) + + dgst, err = dset.Lookup("12345") + if err != nil { + t.Fatal(err) + } + assertEqualDigests(t, dgst, digests[0]) +} + +func TestAddDuplication(t *testing.T) { + digests := []digest.Digest{ + "sha256:1234111111111111111111111111111111111111111111111111111111111111", + "sha256:1234511111111111111111111111111111111111111111111111111111111111", + "sha256:1234611111111111111111111111111111111111111111111111111111111111", + "sha256:5432111111111111111111111111111111111111111111111111111111111111", + "sha256:6543111111111111111111111111111111111111111111111111111111111111", + "sha512:65431111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111", + "sha512:65421111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111", + "sha512:65321111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + if len(dset.entries) != 8 { + t.Fatal("Invalid dset size") + } + + if err := dset.Add(digest.Digest("sha256:1234511111111111111111111111111111111111111111111111111111111111")); err != nil { + t.Fatal(err) + } + + if len(dset.entries) != 8 { + t.Fatal("Duplicate digest insert should not increase entries size") + } + + if err := dset.Add(digest.Digest("sha384:123451111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")); err != nil { + t.Fatal(err) + } + + if len(dset.entries) != 9 { + t.Fatal("Insert with different algorithm should be allowed") + } +} + +func TestRemove(t *testing.T) { + digests, err := createDigests(10) + if err != nil { + t.Fatal(err) + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + dgst, err := dset.Lookup(digests[0].String()) + if err != nil { + t.Fatal(err) + } + if dgst != digests[0] { + t.Fatalf("Unexpected digest value:\n\tExpected: %s\n\tActual: %s", digests[0], dgst) + } + + if err := dset.Remove(digests[0]); err != nil { + t.Fatal(err) + } + + if _, err := dset.Lookup(digests[0].String()); err != ErrDigestNotFound { + t.Fatalf("Expected error %v when looking up removed digest, got %v", ErrDigestNotFound, err) + } +} + +func TestAll(t *testing.T) { + digests, err := createDigests(100) + if err != nil { + t.Fatal(err) + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + all := map[digest.Digest]struct{}{} + for _, dgst := range dset.All() { + all[dgst] = struct{}{} + } + + if len(all) != len(digests) { + t.Fatalf("Unexpected number of unique digests found:\n\tExpected: %d\n\tActual: %d", len(digests), len(all)) + } + + for i, dgst := range digests { + if _, ok := all[dgst]; !ok { + t.Fatalf("Missing element at position %d: %s", i, dgst) + } + } + +} + +func assertEqualShort(t *testing.T, actual, expected string) { + if actual != expected { + t.Fatalf("Unexpected short value:\n\tExpected: %s\n\tActual: %s", expected, actual) + } +} + +func TestShortCodeTable(t *testing.T) { + digests := []digest.Digest{ + "sha256:1234111111111111111111111111111111111111111111111111111111111111", + "sha256:1234511111111111111111111111111111111111111111111111111111111111", + "sha256:1234611111111111111111111111111111111111111111111111111111111111", + "sha256:5432111111111111111111111111111111111111111111111111111111111111", + "sha256:6543111111111111111111111111111111111111111111111111111111111111", + "sha256:6432111111111111111111111111111111111111111111111111111111111111", + "sha256:6542111111111111111111111111111111111111111111111111111111111111", + "sha256:6532111111111111111111111111111111111111111111111111111111111111", + } + + dset := NewSet() + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + t.Fatal(err) + } + } + + dump := ShortCodeTable(dset, 2) + + if len(dump) < len(digests) { + t.Fatalf("Error unexpected size: %d, expecting %d", len(dump), len(digests)) + } + assertEqualShort(t, dump[digests[0]], "12341") + assertEqualShort(t, dump[digests[1]], "12345") + assertEqualShort(t, dump[digests[2]], "12346") + assertEqualShort(t, dump[digests[3]], "54") + assertEqualShort(t, dump[digests[4]], "6543") + assertEqualShort(t, dump[digests[5]], "64") + assertEqualShort(t, dump[digests[6]], "6542") + assertEqualShort(t, dump[digests[7]], "653") +} + +func createDigests(count int) ([]digest.Digest, error) { + r := rand.New(rand.NewSource(25823)) + digests := make([]digest.Digest, count) + for i := range digests { + h := sha256.New() + if err := binary.Write(h, binary.BigEndian, r.Int63()); err != nil { + return nil, err + } + digests[i] = digest.NewDigest("sha256", h) + } + return digests, nil +} + +func benchAddNTable(b *testing.B, n int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for j := range digests { + if err = dset.Add(digests[j]); err != nil { + b.Fatal(err) + } + } + } +} + +func benchLookupNTable(b *testing.B, n int, shortLen int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + b.Fatal(err) + } + } + shorts := make([]string, 0, n) + for _, short := range ShortCodeTable(dset, shortLen) { + shorts = append(shorts, short) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err = dset.Lookup(shorts[i%n]); err != nil { + b.Fatal(err) + } + } +} + +func benchRemoveNTable(b *testing.B, n int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + b.StopTimer() + for j := range digests { + if err = dset.Add(digests[j]); err != nil { + b.Fatal(err) + } + } + b.StartTimer() + for j := range digests { + if err = dset.Remove(digests[j]); err != nil { + b.Fatal(err) + } + } + } +} + +func benchShortCodeNTable(b *testing.B, n int, shortLen int) { + digests, err := createDigests(n) + if err != nil { + b.Fatal(err) + } + dset := &Set{entries: digestEntries(make([]*digestEntry, 0, n))} + for i := range digests { + if err := dset.Add(digests[i]); err != nil { + b.Fatal(err) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + ShortCodeTable(dset, shortLen) + } +} + +func BenchmarkAdd10(b *testing.B) { + benchAddNTable(b, 10) +} + +func BenchmarkAdd100(b *testing.B) { + benchAddNTable(b, 100) +} + +func BenchmarkAdd1000(b *testing.B) { + benchAddNTable(b, 1000) +} + +func BenchmarkRemove10(b *testing.B) { + benchRemoveNTable(b, 10) +} + +func BenchmarkRemove100(b *testing.B) { + benchRemoveNTable(b, 100) +} + +func BenchmarkRemove1000(b *testing.B) { + benchRemoveNTable(b, 1000) +} + +func BenchmarkLookup10(b *testing.B) { + benchLookupNTable(b, 10, 12) +} + +func BenchmarkLookup100(b *testing.B) { + benchLookupNTable(b, 100, 12) +} + +func BenchmarkLookup1000(b *testing.B) { + benchLookupNTable(b, 1000, 12) +} + +func BenchmarkShortCode10(b *testing.B) { + benchShortCodeNTable(b, 10, 12) +} +func BenchmarkShortCode100(b *testing.B) { + benchShortCodeNTable(b, 100, 12) +} +func BenchmarkShortCode1000(b *testing.B) { + benchShortCodeNTable(b, 1000, 12) +}