-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
598 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
package stringutil | ||
|
||
import ( | ||
"fmt" | ||
"sort" | ||
"unicode/utf8" | ||
) | ||
|
||
const ( | ||
terminationCharacter = '#' | ||
) | ||
|
||
func mustDoubleArray(da *doubleArray, err error) *doubleArray { | ||
if err != nil { | ||
panic(err) | ||
} | ||
return da | ||
} | ||
|
||
func (da *doubleArray) Build(keys []string) error { | ||
records := makeRecords(keys) | ||
if err := da.build(records, 1, 0, make(map[int]struct{})); err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
type doubleArray struct { | ||
bc []baseCheck | ||
node []int | ||
} | ||
|
||
func newDoubleArray(keys []string) (*doubleArray, error) { | ||
da := &doubleArray{ | ||
bc: []baseCheck{0}, | ||
node: []int{-1}, // A start index is adjusting to 1 because 0 will be used as a mark of non-existent node. | ||
} | ||
if err := da.Build(keys); err != nil { | ||
return nil, err | ||
} | ||
return da, nil | ||
} | ||
|
||
// baseCheck contains BASE, CHECK and Extra flags. | ||
// From the top, 22bits of BASE, 2bits of Extra flags and 8bits of CHECK. | ||
// | ||
// BASE (22bit) | Extra flags (2bit) | CHECK (8bit) | ||
// |----------------------|--|--------| | ||
// 32 10 8 0 | ||
type baseCheck uint32 | ||
|
||
func (bc baseCheck) Base() int { | ||
return int(bc >> 10) | ||
} | ||
|
||
func (bc *baseCheck) SetBase(base int) { | ||
*bc |= baseCheck(base) << 10 | ||
} | ||
|
||
func (bc baseCheck) Check() byte { | ||
return byte(bc) | ||
} | ||
|
||
func (bc *baseCheck) SetCheck(check byte) { | ||
*bc |= baseCheck(check) | ||
} | ||
|
||
func (bc baseCheck) IsEmpty() bool { | ||
return bc&0xfffffcff == 0 | ||
} | ||
|
||
func (da *doubleArray) Lookup(path string) (length int) { | ||
idx := 1 | ||
tmpIdx := idx | ||
for i := 0; i < len(path); i++ { | ||
c := path[i] | ||
tmpIdx = da.nextIndex(da.bc[tmpIdx].Base(), c) | ||
if tmpIdx >= len(da.bc) || da.bc[tmpIdx].Check() != c { | ||
break | ||
} | ||
idx = tmpIdx | ||
} | ||
if next := da.nextIndex(da.bc[idx].Base(), terminationCharacter); next < len(da.bc) && da.bc[next].Check() == terminationCharacter { | ||
return da.node[da.bc[next].Base()] | ||
} | ||
return -1 | ||
} | ||
|
||
func (da *doubleArray) build(srcs []record, idx, depth int, usedBase map[int]struct{}) error { | ||
sort.Stable(recordSlice(srcs)) | ||
base, siblings, leaf, err := da.arrange(srcs, idx, depth, usedBase) | ||
if err != nil { | ||
return err | ||
} | ||
if leaf != nil { | ||
da.bc[idx].SetBase(len(da.node)) | ||
da.node = append(da.node, leaf.value) | ||
} | ||
for _, sib := range siblings { | ||
da.setCheck(da.nextIndex(base, sib.c), sib.c) | ||
} | ||
for _, sib := range siblings { | ||
if err := da.build(srcs[sib.start:sib.end], da.nextIndex(base, sib.c), depth+1, usedBase); err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (da *doubleArray) setBase(i, base int) { | ||
da.bc[i].SetBase(base) | ||
} | ||
|
||
func (da *doubleArray) setCheck(i int, check byte) { | ||
da.bc[i].SetCheck(check) | ||
} | ||
|
||
func (da *doubleArray) findEmptyIndex(start int) int { | ||
i := start | ||
for ; i < len(da.bc); i++ { | ||
if da.bc[i].IsEmpty() { | ||
break | ||
} | ||
} | ||
return i | ||
} | ||
|
||
// findBase returns good BASE. | ||
func (da *doubleArray) findBase(siblings []sibling, start int, usedBase map[int]struct{}) (base int) { | ||
for idx, firstChar := start+1, siblings[0].c; ; idx = da.findEmptyIndex(idx + 1) { | ||
base = da.nextIndex(idx, firstChar) | ||
if _, used := usedBase[base]; used { | ||
continue | ||
} | ||
i := 0 | ||
for ; i < len(siblings); i++ { | ||
next := da.nextIndex(base, siblings[i].c) | ||
if len(da.bc) <= next { | ||
da.bc = append(da.bc, make([]baseCheck, next-len(da.bc)+1)...) | ||
} | ||
if !da.bc[next].IsEmpty() { | ||
break | ||
} | ||
} | ||
if i == len(siblings) { | ||
break | ||
} | ||
} | ||
usedBase[base] = struct{}{} | ||
return base | ||
} | ||
|
||
func (da *doubleArray) arrange(records []record, idx, depth int, usedBase map[int]struct{}) (base int, siblings []sibling, leaf *record, err error) { | ||
siblings, leaf, err = makeSiblings(records, depth) | ||
if err != nil { | ||
return -1, nil, nil, err | ||
} | ||
if len(siblings) < 1 { | ||
return -1, nil, leaf, nil | ||
} | ||
base = da.findBase(siblings, idx, usedBase) | ||
da.setBase(idx, base) | ||
return base, siblings, leaf, err | ||
} | ||
|
||
type sibling struct { | ||
start int | ||
end int | ||
c byte | ||
} | ||
|
||
func (da *doubleArray) nextIndex(base int, c byte) int { | ||
return base ^ int(c) | ||
} | ||
|
||
func makeSiblings(records []record, depth int) (sib []sibling, leaf *record, err error) { | ||
var ( | ||
pc byte | ||
n int | ||
) | ||
for i, r := range records { | ||
if len(r.key) <= depth { | ||
leaf = &r | ||
continue | ||
} | ||
c := r.key[depth] | ||
switch { | ||
case pc < c: | ||
sib = append(sib, sibling{start: i, c: c}) | ||
case pc == c: | ||
continue | ||
default: | ||
return nil, nil, fmt.Errorf("stringutil: BUG: records hasn't been sorted") | ||
} | ||
if n > 0 { | ||
sib[n-1].end = i | ||
} | ||
pc = c | ||
n++ | ||
} | ||
if n == 0 { | ||
return nil, leaf, nil | ||
} | ||
sib[n-1].end = len(records) | ||
return sib, leaf, nil | ||
} | ||
|
||
type record struct { | ||
key string | ||
value int | ||
} | ||
|
||
func makeRecords(srcs []string) (records []record) { | ||
termChar := string(terminationCharacter) | ||
for _, s := range srcs { | ||
records = append(records, record{ | ||
key: string(s + termChar), | ||
value: utf8.RuneCountInString(s), | ||
}) | ||
} | ||
return records | ||
} | ||
|
||
type recordSlice []record | ||
|
||
func (rs recordSlice) Len() int { | ||
return len(rs) | ||
} | ||
|
||
func (rs recordSlice) Less(i, j int) bool { | ||
return rs[i].key < rs[j].key | ||
} | ||
|
||
func (rs recordSlice) Swap(i, j int) { | ||
rs[i], rs[j] = rs[j], rs[i] | ||
} |
Oops, something went wrong.