Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into 7.6-couchbase
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinavdangeti committed Feb 13, 2024
2 parents 8b9206a + 5f1f45a commit a0cb65a
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 15 deletions.
5 changes: 5 additions & 0 deletions analysis/lang/es/analyzer_es.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ func AnalyzerConstructor(config map[string]interface{},
if err != nil {
return nil, err
}
normalizeEsFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
stopEsFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
Expand All @@ -47,6 +51,7 @@ func AnalyzerConstructor(config map[string]interface{},
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopEsFilter,
normalizeEsFilter,
lightStemmerEsFilter,
},
}
Expand Down
15 changes: 0 additions & 15 deletions analysis/lang/es/light_stemmer_es.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,6 @@ func stem(input []rune) []rune {
return input
}

for i, r := range input {
switch r {
case 'à', 'á', 'â', 'ä':
input[i] = 'a'
case 'ò', 'ó', 'ô', 'ö':
input[i] = 'o'
case 'è', 'é', 'ê', 'ë':
input[i] = 'e'
case 'ù', 'ú', 'û', 'ü':
input[i] = 'u'
case 'ì', 'í', 'î', 'ï':
input[i] = 'i'
}
}

switch input[l-1] {
case 'o', 'a', 'e':
return input[:l-1]
Expand Down
67 changes: 67 additions & 0 deletions analysis/lang/es/spanish_normalize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package es

import (
"bytes"

"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)

const NormalizeName = "normalize_es"

type SpanishNormalizeFilter struct {
}

func NewSpanishNormalizeFilter() *SpanishNormalizeFilter {
return &SpanishNormalizeFilter{}
}

func (s *SpanishNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := normalize(token.Term)
token.Term = term
}
return input
}

func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case 'à', 'á', 'â', 'ä':
runes[i] = 'a'
case 'ò', 'ó', 'ô', 'ö':
runes[i] = 'o'
case 'è', 'é', 'ê', 'ë':
runes[i] = 'e'
case 'ù', 'ú', 'û', 'ü':
runes[i] = 'u'
case 'ì', 'í', 'î', 'ï':
runes[i] = 'i'
}
}

return analysis.BuildTermFromRunes(runes)
}

func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewSpanishNormalizeFilter(), nil
}

func init() {
registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
}
112 changes: 112 additions & 0 deletions analysis/lang/es/spanish_normalize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package es

import (
"reflect"
"testing"

"github.com/blevesearch/bleve/v2/analysis"
)

func TestSpanishNormalizeFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Guía"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Guia"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Belcebú"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Belcebu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Limón"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Limon"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("agüero"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("aguero"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("laúd"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("laud"),
},
},
},
// empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}

spanishNormalizeFilter := NewSpanishNormalizeFilter()
for _, test := range tests {
actual := spanishNormalizeFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected %s(% x), got %s(% x)", test.output[0].Term, test.output[0].Term, actual[0].Term, actual[0].Term)
}
}
}
4 changes: 4 additions & 0 deletions index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,11 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
if len(req.Fields) > 0 || highlighter != nil {
doc, err := r.Document(hit.ID)
if err == nil && doc != nil {
<<<<<<< HEAD

Check failure on line 720 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

syntax error: unexpected <<, expecting }

Check failure on line 720 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, ubuntu-latest)

syntax error: unexpected <<, expecting }

Check failure on line 720 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, macos-latest)

syntax error: unexpected <<, expecting }

Check failure on line 720 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

syntax error: unexpected <<, expected }

Check failure on line 720 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

syntax error: unexpected <<, expected }
if len(req.Fields) > 0 && hit.Fields == nil {
=======

Check failure on line 722 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

syntax error: unexpected ==, expecting }

Check failure on line 722 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, ubuntu-latest)

syntax error: unexpected ==, expecting }

Check failure on line 722 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, macos-latest)

syntax error: unexpected ==, expecting }

Check failure on line 722 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

syntax error: unexpected ==, expected }

Check failure on line 722 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

syntax error: unexpected ==, expected }
if len(req.Fields) > 0 {
>>>>>>> origin/master

Check failure on line 724 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.18.x, ubuntu-latest)

syntax error: unexpected >>, expecting }

Check failure on line 724 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, ubuntu-latest)

syntax error: unexpected >>, expecting }

Check failure on line 724 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.19.x, macos-latest)

syntax error: unexpected >>, expecting }

Check failure on line 724 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

syntax error: unexpected >>, expected }

Check failure on line 724 in index_impl.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

syntax error: unexpected >>, expected }
totalStoredFieldsBytes = doc.StoredFieldsBytes()
fieldsToLoad := deDuplicate(req.Fields)
for _, f := range fieldsToLoad {
Expand Down

0 comments on commit a0cb65a

Please sign in to comment.