Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize storage format #797

Merged
merged 16 commits into from
Apr 13, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Optimize encoding of dictionaries
Switch from CBOR map to CBOR array to improve speed and preserve
ordering.

Remove key strings from encoding.

Remove old backwards-compatibility decoding code.

Add tests, including round-trip for decoding old format and encoding new
format.

Closes #743
  • Loading branch information
fxamacker authored and turbolent committed Apr 2, 2021
commit 792541a0a501b8d434c0fbf6703d3a1e3193071c
74 changes: 4 additions & 70 deletions runtime/interpreter/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package interpreter

import (
"bytes"
"encoding/hex"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -305,7 +304,7 @@ func (d *DecoderV4) decodeArray(v []interface{}, path []string) (*ArrayValue, er
}

func (d *DecoderV4) decodeDictionary(v interface{}, path []string) (*DictionaryValue, error) {
encoded, ok := v.(map[interface{}]interface{})
encoded, ok := v.([]interface{})
if !ok {
return nil, fmt.Errorf(
"invalid dictionary encoding (@ %s): %T",
Expand Down Expand Up @@ -335,7 +334,7 @@ func (d *DecoderV4) decodeDictionary(v interface{}, path []string) (*DictionaryV
}

entriesField := encoded[encodedDictionaryValueEntriesFieldKey]
encodedEntries, ok := entriesField.(map[interface{}]interface{})
encodedEntries, ok := entriesField.([]interface{})
if !ok {
return nil, fmt.Errorf(
"invalid dictionary entries encoding (@ %s): %T",
Expand All @@ -347,57 +346,6 @@ func (d *DecoderV4) decodeDictionary(v interface{}, path []string) (*DictionaryV
keyCount := keys.Count()
entryCount := len(encodedEntries)

// In versions <= 2, the dictionary key string function
// was accidentally, temporarily changed without a version change.
//
// The key string format for address values is:
// prefix the address with 0x, encode in hex, and strip leading zeros.
//
// Temporarily and accidentally the format was:
// no 0x prefix, and encode and in full length hex.
//
// Detect this temporary format and correct it

var hasAddressValueKeyInWrongPre3Format bool

if d.version <= 2 {
for _, keyValue := range keys.Values {
keyAddressValue, ok := keyValue.(AddressValue)
if !ok {
continue
}

currentKeyString := keyAddressValue.KeyString()
wrongKeyString := hex.EncodeToString(keyAddressValue[:])

// Is there a value stored with the current format?
// Then no migration is necessary.

if encodedEntries[currentKeyString] != nil {
continue
}

// Is there at least a value stored in the wrong key string format?

if encodedEntries[wrongKeyString] == nil {

return nil, fmt.Errorf(
"invalid dictionary address value key: "+
"could neither find entry for wrong format key %s, nor for current format key %s",
wrongKeyString,
currentKeyString,
)
}

// Migrate the value from the wrong format to the current format

hasAddressValueKeyInWrongPre3Format = true

encodedEntries[currentKeyString] = encodedEntries[wrongKeyString]
delete(encodedEntries, wrongKeyString)
}
}

// The number of entries must either match the number of keys,
// or be zero in case the values are deferred

Expand All @@ -424,13 +372,6 @@ func (d *DecoderV4) decodeDictionary(v interface{}, path []string) (*DictionaryV

if isDeferred {

if hasAddressValueKeyInWrongPre3Format {
return nil, fmt.Errorf(
"invalid dictionary (@ %s): dictionary with address values in pre-3 format and deferred values",
strings.Join(path, "."),
)
}

deferred = orderedmap.NewStringStructOrderedMap()
deferredOwner = d.owner
deferredStorageKeyBase = joinPath(append(path[:], dictionaryValuePathPrefix))
Expand All @@ -455,15 +396,8 @@ func (d *DecoderV4) decodeDictionary(v interface{}, path []string) (*DictionaryV
}

keyString := keyStringValue.KeyString()
value, ok := encodedEntries[keyString]
if !ok {
return nil, fmt.Errorf(
"missing dictionary value for key (@ %s, %d): %s",
strings.Join(path, "."),
index,
keyString,
)
}

value := encodedEntries[index]

valuePath := append(path[:], dictionaryValuePathPrefix, keyString)
decodedValue, err := d.decodeValue(value, valuePath)
Expand Down
10 changes: 6 additions & 4 deletions runtime/interpreter/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ import (
"github.com/onflow/cadence/runtime/common"
)

type cborArray = []interface{}

type cborMap = map[uint64]interface{}

// Cadence needs to encode different kinds of objects in CBOR, for instance,
Expand Down Expand Up @@ -667,7 +669,6 @@ const (
const dictionaryKeyPathPrefix = "k"
const dictionaryValuePathPrefix = "v"

// TODO: optimize: use CBOR map, but unclear how to preserve ordering
func (e *Encoder) prepareDictionaryValue(
v *DictionaryValue,
path []string,
Expand All @@ -683,7 +684,8 @@ func (e *Encoder) prepareDictionaryValue(
return nil, err
}

entries := make(map[string]interface{}, v.Entries.Len())
// Use CBOR array for entry value to preserve ordering and improve speed.
entries := make([]interface{}, 0, v.Entries.Len())

// Deferring the encoding of values is only supported if all
// values are resources: resource typed dictionaries are moved
Expand Down Expand Up @@ -755,13 +757,13 @@ func (e *Encoder) prepareDictionaryValue(
if err != nil {
return nil, err
}
entries[key] = prepared
entries = append(entries, prepared)
}
}

return cbor.Tag{
Number: cborTagDictionaryValue,
Content: cborMap{
Content: cborArray{
encodedDictionaryValueKeysFieldKey: keys,
encodedDictionaryValueEntriesFieldKey: entries,
},
Expand Down
Loading