Skip to content

Commit

Permalink
merge PR #58, supports matches XPath function. close #57
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengchun committed Nov 22, 2020
2 parents a5d9242 + e66c45d commit cd2afb8
Show file tree
Hide file tree
Showing 7 changed files with 353 additions and 13 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ Supported Features
`lang()`| ✗ |
`last()`| ✓ |
`local-name()`| ✓ |
`matches()`| ✓ |
`name()`| ✓ |
`namespace-uri()`| ✓ |
`normalize-space()`| ✓ |
Expand Down
51 changes: 51 additions & 0 deletions assert_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package xpath

import (
"reflect"
"testing"
)

func assertEqual(tb testing.TB, v1, v2 interface{}) {
if !reflect.DeepEqual(v1, v2) {
tb.Fatalf("'%+v' and '%+v' are not equal", v1, v2)
}
}

func assertNoErr(tb testing.TB, err error) {
if err != nil {
tb.Fatalf("expected no err, but got: %s", err.Error())
}
}

func assertErr(tb testing.TB, err error) {
if err == nil {
tb.Fatal("expected err, but got nil")
}
}

func assertTrue(tb testing.TB, v bool) {
if !v {
tb.Fatal("expected true, but got false")
}
}

func assertFalse(tb testing.TB, v bool) {
if v {
tb.Fatal("expected false, but got true")
}
}

func assertNil(tb testing.TB, v interface{}) {
if v != nil && !reflect.ValueOf(v).IsNil() {
tb.Fatalf("expected nil, but got: %+v", v)
}
}

func assertPanic(t *testing.T, f func()) {
defer func() {
if r := recover(); r == nil {
t.Errorf("The code did not panic")
}
}()
f()
}
17 changes: 16 additions & 1 deletion build.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,23 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) {
if err != nil {
return nil, err
}

qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)}
case "matches":
//matches(string , pattern)
if len(root.Args) != 2 {
return nil, errors.New("xpath: matches function must have two parameters")
}
var (
arg1, arg2 query
err error
)
if arg1, err = b.processNode(root.Args[0]); err != nil {
return nil, err
}
if arg2, err = b.processNode(root.Args[1]); err != nil {
return nil, err
}
qyOutput = &functionQuery{Input: b.firstInput, Func: matchesFunc(arg1, arg2)}
case "substring":
//substring( string , start [, length] )
if len(root.Args) < 2 {
Expand Down
80 changes: 80 additions & 0 deletions cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package xpath

import (
"regexp"
"sync"
)

type loadFunc func(key interface{}) (interface{}, error)

const (
defaultCap = 65536
)

// The reason we're building a simple capacity-resetting loading cache (when capacity reached) instead of using
// something like github.com/hashicorp/golang-lru is primarily due to (not wanting to create) external dependency.
// Currently this library has 0 external dep (other than go sdk), and supports go 1.6, 1.9, and 1.10 (and later).
// Creating external lib dependencies (plus their transitive dependencies) would make things hard if not impossible.
// We expect under most circumstances, the defaultCap is big enough for any long running services that use this
// library if their xpath regexp cardinality is low. However, in extreme cases when the capacity is reached, we
// simply reset the cache, taking a small subsequent perf hit (next to nothing considering amortization) in trade
// of more complex and less performant LRU type of construct.
type loadingCache struct {
sync.RWMutex
cap int
load loadFunc
m map[interface{}]interface{}
reset int
}

// NewLoadingCache creates a new instance of a loading cache with capacity. Capacity must be >= 0, or
// it will panic. Capacity == 0 means the cache growth is unbounded.
func NewLoadingCache(load loadFunc, capacity int) *loadingCache {
if capacity < 0 {
panic("capacity must be >= 0")
}
return &loadingCache{cap: capacity, load: load, m: make(map[interface{}]interface{})}
}

func (c *loadingCache) get(key interface{}) (interface{}, error) {
c.RLock()
v, found := c.m[key]
c.RUnlock()
if found {
return v, nil
}
v, err := c.load(key)
if err != nil {
return nil, err
}
c.Lock()
if c.cap > 0 && len(c.m) >= c.cap {
c.m = map[interface{}]interface{}{key: v}
c.reset++
} else {
c.m[key] = v
}
c.Unlock()
return v, nil
}

var (
// RegexpCache is a loading cache for string -> *regexp.Regexp mapping. It is exported so that in rare cases
// client can customize load func and/or capacity.
RegexpCache = defaultRegexpCache()
)

func defaultRegexpCache() *loadingCache {
return NewLoadingCache(
func(key interface{}) (interface{}, error) {
return regexp.Compile(key.(string))
}, defaultCap)
}

func getRegexp(pattern string) (*regexp.Regexp, error) {
exp, err := RegexpCache.get(pattern)
if err != nil {
return nil, err
}
return exp.(*regexp.Regexp), nil
}
166 changes: 166 additions & 0 deletions cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
package xpath

import (
"errors"
"fmt"
"math/rand"
"strconv"
"sync"
"testing"
)

func TestLoadingCache(t *testing.T) {
c := NewLoadingCache(
func(key interface{}) (interface{}, error) {
switch v := key.(type) {
case int:
return strconv.Itoa(v), nil
default:
return nil, errors.New("invalid type")
}
},
2) // cap = 2
assertEqual(t, 0, len(c.m))
v, err := c.get(1)
assertNoErr(t, err)
assertEqual(t, "1", v)
assertEqual(t, 1, len(c.m))

v, err = c.get(1)
assertNoErr(t, err)
assertEqual(t, "1", v)
assertEqual(t, 1, len(c.m))

v, err = c.get(2)
assertNoErr(t, err)
assertEqual(t, "2", v)
assertEqual(t, 2, len(c.m))

// over capacity, m is reset
v, err = c.get(3)
assertNoErr(t, err)
assertEqual(t, "3", v)
assertEqual(t, 1, len(c.m))

// Invalid capacity
assertPanic(t, func() {
NewLoadingCache(func(key interface{}) (interface{}, error) { return key, nil }, -1)
})

// Loading failure
c = NewLoadingCache(
func(key interface{}) (interface{}, error) {
if key.(int)%2 == 0 {
return key, nil
} else {
return nil, fmt.Errorf("artificial error: %d", key.(int))
}
}, 0)
v, err = c.get(12)
assertNoErr(t, err)
assertEqual(t, 12, v)
_, err = c.get(21)
assertErr(t, err)
assertEqual(t, "artificial error: 21", err.Error())
}

const (
benchLoadingCacheRandSeed = 12345
benchLoadingCacheConcurrency = 5
benchLoadingCacheKeyRange = 2000
benchLoadingCacheCap = 1000
)

func BenchmarkLoadingCacheCapped_SingleThread(b *testing.B) {
rand.Seed(benchLoadingCacheRandSeed)
c := NewLoadingCache(
func(key interface{}) (interface{}, error) {
return key, nil
}, benchLoadingCacheCap)
for i := 0; i < b.N; i++ {
k := rand.Intn(benchLoadingCacheKeyRange)
v, _ := c.get(k)
if k != v {
b.FailNow()
}
}
b.Logf("N=%d, reset=%d", b.N, c.reset)
}

func BenchmarkLoadingCacheCapped_MultiThread(b *testing.B) {
rand.Seed(benchLoadingCacheRandSeed)
c := NewLoadingCache(
func(key interface{}) (interface{}, error) {
return key, nil
}, benchLoadingCacheCap)
wg := sync.WaitGroup{}
wg.Add(benchLoadingCacheConcurrency)
for i := 0; i < benchLoadingCacheConcurrency; i++ {
go func() {
for j := 0; j < b.N; j++ {
k := rand.Intn(benchLoadingCacheKeyRange)
v, _ := c.get(k)
if k != v {
b.FailNow()
}
}
defer wg.Done()
}()
}
wg.Wait()
b.Logf("N=%d, concurrency=%d, reset=%d", b.N, benchLoadingCacheConcurrency, c.reset)
}

func BenchmarkLoadingCacheNoCap_SingleThread(b *testing.B) {
rand.Seed(benchLoadingCacheRandSeed)
c := NewLoadingCache(
func(key interface{}) (interface{}, error) {
return key, nil
}, 0) // 0 => no cap
for i := 0; i < b.N; i++ {
k := rand.Intn(benchLoadingCacheKeyRange)
v, _ := c.get(k)
if k != v {
b.FailNow()
}
}
b.Logf("N=%d, reset=%d", b.N, c.reset)
}

func BenchmarkLoadingCacheNoCap_MultiThread(b *testing.B) {
rand.Seed(benchLoadingCacheRandSeed)
c := NewLoadingCache(
func(key interface{}) (interface{}, error) {
return key, nil
}, 0) // 0 => no cap
wg := sync.WaitGroup{}
wg.Add(benchLoadingCacheConcurrency)
for i := 0; i < benchLoadingCacheConcurrency; i++ {
go func() {
for j := 0; j < b.N; j++ {
k := rand.Intn(benchLoadingCacheKeyRange)
v, _ := c.get(k)
if k != v {
b.FailNow()
}
}
defer wg.Done()
}()
}
wg.Wait()
b.Logf("N=%d, concurrency=%d, reset=%d", b.N, benchLoadingCacheConcurrency, c.reset)
}

func TestGetRegexp(t *testing.T) {
RegexpCache = defaultRegexpCache()
assertEqual(t, 0, len(RegexpCache.m))
assertEqual(t, defaultCap, RegexpCache.cap)
exp, err := getRegexp("^[0-9]{3,5}$")
assertNoErr(t, err)
assertTrue(t, exp.MatchString("3141"))
assertFalse(t, exp.MatchString("3"))
exp, err = getRegexp("[invalid")
assertErr(t, err)
assertEqual(t, "error parsing regexp: missing closing ]: `[invalid`", err.Error())
assertNil(t, exp)
}
31 changes: 30 additions & 1 deletion func.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"unicode"
)

// Defined an interface of stringBuilder that compatible with
// Defined an interface of stringBuilder that compatible with
// strings.Builder(go 1.10) and bytes.Buffer(< go 1.10)
type stringBuilder interface {
WriteRune(r rune) (n int, err error)
Expand Down Expand Up @@ -354,6 +354,35 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
}
}

// matchesFunc is an XPath function that tests a given string against a regexp pattern.
// Note: does not support https://www.w3.org/TR/xpath-functions-31/#func-matches 3rd optional `flags` argument; if
// needed, directly put flags in the regexp pattern, such as `(?i)^pattern$` for `i` flag.
func matchesFunc(arg1, arg2 query) func(query, iterator) interface{} {
return func(q query, t iterator) interface{} {
var s string
switch typ := functionArgs(arg1).Evaluate(t).(type) {
case string:
s = typ
case query:
node := typ.Select(t)
if node == nil {
return ""
}
s = node.Value()
}
var pattern string
var ok bool
if pattern, ok = functionArgs(arg2).Evaluate(t).(string); !ok {
panic(errors.New("matches() function second argument type must be string"))
}
re, err := getRegexp(pattern)
if err != nil {
panic(fmt.Errorf("matches() function second argument is not a valid regexp pattern, err: %s", err.Error()))
}
return re.MatchString(s)
}
}

// normalizespaceFunc is XPath functions normalize-space(string?)
func normalizespaceFunc(q query, t iterator) interface{} {
var m string
Expand Down
Loading

0 comments on commit cd2afb8

Please sign in to comment.