Skip to content

Commit

Permalink
Added Position field to the Tokenizer (#128)
Browse files Browse the repository at this point in the history
The Position gives a means to index into the Tokenizer's underlying byte slice.
This enables use cases where the caller is planning on making edits to the JSON
document but wants to leverage the copy func to optimize data movement and/or 
to copy remaining bytes if the caller wants to exit the tokenizing loop early.
  • Loading branch information
Steve van Loben Sels authored Nov 2, 2022
1 parent 3391c4a commit b2d0aeb
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 13 deletions.
18 changes: 17 additions & 1 deletion json/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
// ...
// }
// }
//
type Tokenizer struct {
// When the tokenizer is positioned on a json delimiter this field is not
// zero. In this case the possible values are '{', '}', '[', ']', ':', and
Expand All @@ -44,6 +43,17 @@ type Tokenizer struct {
// null, true, false, numbers, or quoted strings.
Value RawValue

// Position is the Tokenizer's current index into the underlying byte slice.
// Since the Tokenizer has already been advanced by calling Next, this
// position will be the first index of the next token. The position of
// the current Value can be calculated by subtracting len(token.value).
// Accordingly, slicing the underlying bytes like:
//
// b[token.Position-len(token.Value):token.Position]
//
// will yield the current Value.
Position int

// When the tokenizer has encountered invalid content this field is not nil.
Err error

Expand Down Expand Up @@ -92,6 +102,7 @@ func (t *Tokenizer) Reset(b []byte) {
// However, it does not compile down to an invocation of duff-copy.
t.Delim = 0
t.Value = nil
t.Position = 0
t.Err = nil
t.Depth = 0
t.Index = 0
Expand Down Expand Up @@ -128,13 +139,16 @@ skipLoop:

if i > 0 {
t.json = t.json[i:]
t.Position += i
}

if len(t.json) == 0 {
t.Reset(nil)
return false
}

lenBefore := len(t.json)

var kind Kind
switch t.json[0] {
case '"':
Expand Down Expand Up @@ -165,6 +179,8 @@ skipLoop:
t.Value, t.json, t.Err = t.json[:1], t.json[1:], syntaxError(t.json, "expected token but found '%c'", t.json[0])
}

t.Position += lenBefore - len(t.json)

t.Depth = t.depth()
t.Index = t.index()
t.flags = t.flags.withKind(kind)
Expand Down
33 changes: 21 additions & 12 deletions json/token_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package json

import (
"bytes"
"reflect"
"testing"
)
Expand Down Expand Up @@ -40,22 +41,30 @@ func value(v string, depth, index int) token {
}
}

func tokenize(b []byte) (tokens []token) {
t := NewTokenizer(b)
func tokenize(t *testing.T, b []byte) (tokens []token) {
tok := NewTokenizer(b)

for tok.Next() {
start, end := tok.Position-len(tok.Value), tok.Position
if end > len(b) {
t.Fatalf("token position too far [%d:%d], len(b) is %d", start, end, len(b))
}
if !bytes.Equal(b[start:end], tok.Value) {
t.Fatalf("token position is wrong [%d:%d]", start, end)
}

for t.Next() {
tokens = append(tokens, token{
delim: t.Delim,
value: t.Value,
err: t.Err,
depth: t.Depth,
index: t.Index,
isKey: t.IsKey,
delim: tok.Delim,
value: tok.Value,
err: tok.Err,
depth: tok.Depth,
index: tok.Index,
isKey: tok.IsKey,
})
}

if t.Err != nil {
panic(t.Err)
if tok.Err != nil {
t.Fatal(tok.Err)
}

return
Expand Down Expand Up @@ -174,7 +183,7 @@ func TestTokenizer(t *testing.T) {

for _, test := range tests {
t.Run(string(test.input), func(t *testing.T) {
tokens := tokenize(test.input)
tokens := tokenize(t, test.input)

if !reflect.DeepEqual(tokens, test.tokens) {
t.Error("tokens mismatch")
Expand Down

0 comments on commit b2d0aeb

Please sign in to comment.