Skip to content

Commit

Permalink
fix(synonyms): use multiplexers to prevent viral synonyms, clean up t…
Browse files Browse the repository at this point in the history
…oken filter ordering
  • Loading branch information
missinglink authored and orangejulius committed Jul 13, 2020
1 parent 1ac811f commit 1f4370f
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 75 deletions.
68 changes: 42 additions & 26 deletions settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@ function generate(){
"char_filter" : ["punctuation", "nfkc_normalizer"],
"filter": [
"lowercase",
"icu_folding",
"trim",
"synonyms/custom_admin",
"synonyms/personal_titles",
"synonyms/place_names",
"admin_synonyms_multiplexer",
"icu_folding",
"word_delimiter",
"unique_only_same_position",
"notnull",
Expand All @@ -50,14 +48,9 @@ function generate(){
"char_filter" : ["punctuation", "nfkc_normalizer"],
"filter": [
"lowercase",
"icu_folding",
"trim",
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"synonyms/punctuation",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"peliasOneEdgeGramFilter",
Expand All @@ -71,9 +64,9 @@ function generate(){
"tokenizer": "peliasTokenizer",
"char_filter": ["punctuation", "nfkc_normalizer"],
"filter": [
"icu_folding",
"lowercase",
"trim",
"icu_folding",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"unique_only_same_position",
Expand All @@ -88,12 +81,7 @@ function generate(){
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/punctuation",
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"unique_only_same_position",
Expand All @@ -104,23 +92,23 @@ function generate(){
"peliasZip": {
"type": "custom",
"tokenizer":"keyword",
"char_filter" : ["alphanumeric"],
"char_filter": ["alphanumeric", "nfkc_normalizer"],
"filter": [
"lowercase",
"icu_folding",
"trim",
"icu_folding",
"unique_only_same_position",
"notnull"
]
},
"peliasUnit": {
"type": "custom",
"tokenizer":"keyword",
"char_filter" : ["alphanumeric"],
"char_filter": ["alphanumeric", "nfkc_normalizer"],
"filter": [
"lowercase",
"icu_folding",
"trim",
"icu_folding",
"unique_only_same_position",
"notnull"
]
Expand All @@ -138,10 +126,7 @@ function generate(){
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/custom_street",
"synonyms/personal_titles",
"synonyms/streets",
"synonyms/directionals",
"street_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"trim",
Expand All @@ -152,6 +137,37 @@ function generate(){
}
},
"filter" : {
"street_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_street",
"synonyms/personal_titles",
"synonyms/streets",
"synonyms/directionals"
]
},
"name_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"synonyms/punctuation"
]
},
"admin_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_admin",
"synonyms/personal_titles",
"synonyms/place_names"
]
},
"notnull" :{
"type" : "length",
"min" : 1
Expand Down
70 changes: 44 additions & 26 deletions test/fixtures/expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,9 @@
],
"filter": [
"lowercase",
"icu_folding",
"trim",
"synonyms/custom_admin",
"synonyms/personal_titles",
"synonyms/place_names",
"admin_synonyms_multiplexer",
"icu_folding",
"word_delimiter",
"unique_only_same_position",
"notnull",
Expand All @@ -49,14 +47,9 @@
],
"filter": [
"lowercase",
"icu_folding",
"trim",
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"synonyms/punctuation",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"peliasOneEdgeGramFilter",
Expand All @@ -73,9 +66,9 @@
"nfkc_normalizer"
],
"filter": [
"icu_folding",
"lowercase",
"trim",
"icu_folding",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"unique_only_same_position",
Expand All @@ -93,12 +86,7 @@
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/punctuation",
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"unique_only_same_position",
Expand All @@ -110,12 +98,13 @@
"type": "custom",
"tokenizer": "keyword",
"char_filter": [
"alphanumeric"
"alphanumeric",
"nfkc_normalizer"
],
"filter": [
"lowercase",
"icu_folding",
"trim",
"icu_folding",
"unique_only_same_position",
"notnull"
]
Expand All @@ -124,12 +113,13 @@
"type": "custom",
"tokenizer": "keyword",
"char_filter": [
"alphanumeric"
"alphanumeric",
"nfkc_normalizer"
],
"filter": [
"lowercase",
"icu_folding",
"trim",
"icu_folding",
"unique_only_same_position",
"notnull"
]
Expand All @@ -152,10 +142,7 @@
"lowercase",
"trim",
"remove_duplicate_spaces",
"synonyms/custom_street",
"synonyms/personal_titles",
"synonyms/streets",
"synonyms/directionals",
"street_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"trim",
Expand All @@ -166,6 +153,37 @@
}
},
"filter": {
"street_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_street",
"synonyms/personal_titles",
"synonyms/streets",
"synonyms/directionals"
]
},
"name_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"synonyms/punctuation"
]
},
"admin_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_admin",
"synonyms/personal_titles",
"synonyms/place_names"
]
},
"notnull": {
"type": "length",
"min": 1
Expand Down
Loading

0 comments on commit 1f4370f

Please sign in to comment.