Skip to content

Commit

Permalink
Merge pull request #144 from go-ego/range-pr
Browse files Browse the repository at this point in the history
add more stop world function and test code
  • Loading branch information
vcaesar authored Mar 1, 2022
2 parents 794c2ce + 99005b7 commit 769e88e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
8 changes: 7 additions & 1 deletion gse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,16 @@ func TestStop(t *testing.T) {
s = FilterHtml(t2)
tt.Equal(t, "test: bot 机器人 <<银河系漫游指南>> ", s)

prodSeg.AddStop(`"`)
prodSeg.AddStopArr("class", "div", "=")
tt.True(t, prodSeg.IsStop("="))
s1 := prodSeg.CutStop(t2, false)
tt.Equal(t, "[p test : p bot bot 机器人 银河系 漫游 指南]", s1)

s = prodSeg.CutTrimHtmls(t2, true)
tt.Equal(t, "test bot 机器人 银河系 漫游 指南", s)

s1 := Range("hibot, 机器人")
s1 = Range("hibot, 机器人")
tt.Equal(t, "[h i b o t , 机 器 人]", s1)
s = RangeText("hibot, 机器人")
tt.Equal(t, "h i b o t , 机 器 人 ", s)
Expand Down
5 changes: 5 additions & 0 deletions stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ func (seg *Segmenter) AddStop(text string) {
seg.StopWordMap[text] = true
}

// AddStopArr add array stop token to stop dictionaries
func (seg *Segmenter) AddStopArr(text ...string) {
seg.LoadStopArr(text)
}

// RemoveStop remove a token from the StopWord dictionary.
func (seg *Segmenter) RemoveStop(text string) {
delete(seg.StopWordMap, text)
Expand Down
15 changes: 15 additions & 0 deletions trim.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ func (seg *Segmenter) TrimWithPos(se []SegPos, pos ...string) (re []SegPos) {
return
}

// Stop trim []string stop word
func (seg *Segmenter) Stop(s []string) (r []string) {
for _, v := range s {
if !seg.IsStop(v) && v != "" {
r = append(r, v)
}
}
return
}

// Trim trim []string exclude symbol, space and punct
func (seg *Segmenter) Trim(s []string) (r []string) {
for i := 0; i < len(s); i++ {
Expand Down Expand Up @@ -128,6 +138,11 @@ func (seg *Segmenter) TrimPos(s []SegPos) (r []SegPos) {
return
}

// CutStop cut string and tirm stop
func (seg *Segmenter) CutStop(str string, hmm ...bool) []string {
return seg.Stop(seg.Cut(str, hmm...))
}

// CutTrim cut string and tirm
func (seg *Segmenter) CutTrim(str string, hmm ...bool) []string {
s := seg.Cut(str, hmm...)
Expand Down

0 comments on commit 769e88e

Please sign in to comment.