Skip to content

Commit

Permalink
Merge pull request #134 from go-ego/range-pr
Browse files Browse the repository at this point in the history
 optimize and export findAllOccs code, optimize analyze code
  • Loading branch information
vcaesar authored Oct 27, 2021
2 parents b38e5f6 + de79f86 commit 305682e
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
13 changes: 7 additions & 6 deletions dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@ func (seg *Segmenter) Value(str string) (int, int, error) {
return seg.Dict.Value([]byte(str))
}

func findAllOccs(data []byte, searches []string) map[string][]int {
// FindAllOccs find the all search byte start in data
func FindAllOccs(data []byte, searches []string) map[string][]int {
results := make(map[string][]int, 0)
tmp := data
for _, search := range searches {
index := len(data)
tmp := data
for {
match := bytes.LastIndex(tmp[0:index], []byte(search))
if match == -1 {
Expand All @@ -72,7 +73,7 @@ func (seg *Segmenter) Analyze(text []string, t1 string, by ...bool) (az []Analyz

start, end := 0, 0
if t1 == "" {
if len(by) <= 0 {
if len(by) > 0 {
end = len([]rune(text[0]))
} else {
end = len([]byte(text[0]))
Expand All @@ -83,11 +84,11 @@ func (seg *Segmenter) Analyze(text []string, t1 string, by ...bool) (az []Analyz
if ToLower {
t1 = strings.ToLower(t1)
}
all := findAllOccs([]byte(t1), text)
all := FindAllOccs([]byte(t1), text)
for k, v := range text {
if k > 0 && t1 == "" {
start = az[k-1].End
if len(by) <= 0 {
if len(by) > 0 {
end = az[k-1].End + len([]rune(v))
} else {
end = az[k-1].End + len([]byte(v))
Expand All @@ -96,7 +97,7 @@ func (seg *Segmenter) Analyze(text []string, t1 string, by ...bool) (az []Analyz

if t1 != "" {
if _, ok := isEx[v]; ok {
isEx[v] = isEx[v] + 1
isEx[v]++
} else {
isEx[v] = 0
}
Expand Down
2 changes: 1 addition & 1 deletion gse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func TestAnalyze(t *testing.T) {
tt.Equal(t, 23, len(s))
tt.Equal(t, "[城市地标 建筑 : 纽约 帝国大厦 , 旧金山湾 金门大桥 , seattle space needle , toronto cn tower , 伦敦 大笨钟]", s)

a := prodSeg.Analyze(s, "")
a := prodSeg.Analyze(s, "", true)
tt.Equal(t, 23, len(a))
tt.Equal(t, "[{0 4 0 0 城市地标 3 j} {4 6 1 0 建筑 14397 n} {6 8 2 0 : 0 } {8 10 3 0 纽约 1758 ns} {10 14 4 0 帝国大厦 3 nr} {14 16 5 0 , 0 } {16 20 6 0 旧金山湾 3 ns} {20 24 7 0 金门大桥 38 nz} {24 26 8 0 , 0 } {26 33 9 0 seattle 0 } {33 34 10 0 0 } {34 39 11 0 space 0 } {39 40 12 0 0 } {40 46 13 0 needle 0 } {46 48 14 0 , 0 } {48 55 15 0 toronto 0 } {55 56 16 0 0 } {56 58 17 0 cn 0 } {58 59 18 0 0 } {59 64 19 0 tower 0 } {64 66 20 0 , 0 } {66 68 21 0 伦敦 2255 ns} {68 71 22 0 大笨钟 0 }]", a)

Expand Down

0 comments on commit 305682e

Please sign in to comment.