|  | 
|  | 1 | +--- | 
|  | 2 | +layout: default | 
|  | 3 | +title: Keyword marker | 
|  | 4 | +parent: Token filters | 
|  | 5 | +nav_order: 200 | 
|  | 6 | +--- | 
|  | 7 | + | 
|  | 8 | +# Keyword marker token filter | 
|  | 9 | + | 
|  | 10 | +The `keyword_marker` token filter is used to prevent certain tokens from being altered by stemmers or other filters. The `keyword_marker` token filter does this by marking the specified tokens as `keywords`, which prevents any stemming or other processing. This ensures that specific words remain in their original form.  | 
|  | 11 | + | 
|  | 12 | +## Parameters | 
|  | 13 | + | 
|  | 14 | +The `keyword_marker` token filter can be configured with the following parameters. | 
|  | 15 | + | 
|  | 16 | +Parameter | Required/Optional | Data type | Description | 
|  | 17 | +:--- | :--- | :--- | :---  | 
|  | 18 | +`ignore_case` | Optional | Boolean | Whether to ignore the letter case when matching keywords. Default is `false`. | 
|  | 19 | +`keywords` | Required if either `keywords_path` or `keywords_pattern` is not set | List of strings | The list of tokens to mark as keywords.  | 
|  | 20 | +`keywords_path` | Required if either `keywords` or `keywords_pattern` is not set | String | The path (relative to the `config` directory or absolute) to the list of keywords. | 
|  | 21 | +`keywords_pattern` | Required if either `keywords` or `keywords_path` is not set | String | A [regular expression](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html) used for matching tokens to be marked as keywords. | 
|  | 22 | +  | 
|  | 23 | + | 
|  | 24 | +## Example | 
|  | 25 | + | 
|  | 26 | +The following example request creates a new index named `my_index` and configures an analyzer with a `keyword_marker` filter. The filter marks the word `example` as a keyword: | 
|  | 27 | + | 
|  | 28 | +```json | 
|  | 29 | +PUT /my_index | 
|  | 30 | +{ | 
|  | 31 | +  "settings": { | 
|  | 32 | +    "analysis": { | 
|  | 33 | +      "analyzer": { | 
|  | 34 | +        "custom_analyzer": { | 
|  | 35 | +          "type": "custom", | 
|  | 36 | +          "tokenizer": "standard", | 
|  | 37 | +          "filter": ["lowercase", "keyword_marker_filter", "stemmer"] | 
|  | 38 | +        } | 
|  | 39 | +      }, | 
|  | 40 | +      "filter": { | 
|  | 41 | +        "keyword_marker_filter": { | 
|  | 42 | +          "type": "keyword_marker", | 
|  | 43 | +          "keywords": ["example"] | 
|  | 44 | +        } | 
|  | 45 | +      } | 
|  | 46 | +    } | 
|  | 47 | +  } | 
|  | 48 | +} | 
|  | 49 | +``` | 
|  | 50 | +{% include copy-curl.html %} | 
|  | 51 | + | 
|  | 52 | +## Generated tokens | 
|  | 53 | + | 
|  | 54 | +Use the following request to examine the tokens generated using the analyzer: | 
|  | 55 | + | 
|  | 56 | +```json | 
|  | 57 | +GET /my_index/_analyze | 
|  | 58 | +{ | 
|  | 59 | +  "analyzer": "custom_analyzer", | 
|  | 60 | +  "text": "Favorite example" | 
|  | 61 | +} | 
|  | 62 | +``` | 
|  | 63 | +{% include copy-curl.html %} | 
|  | 64 | + | 
|  | 65 | +The response contains the generated tokens. Note that while the word `favorite` was stemmed, the word `example` was not stemmed because it was marked as a keyword: | 
|  | 66 | + | 
|  | 67 | +```json | 
|  | 68 | +{ | 
|  | 69 | +  "tokens": [ | 
|  | 70 | +    { | 
|  | 71 | +      "token": "favorit", | 
|  | 72 | +      "start_offset": 0, | 
|  | 73 | +      "end_offset": 8, | 
|  | 74 | +      "type": "<ALPHANUM>", | 
|  | 75 | +      "position": 0 | 
|  | 76 | +    }, | 
|  | 77 | +    { | 
|  | 78 | +      "token": "example", | 
|  | 79 | +      "start_offset": 9, | 
|  | 80 | +      "end_offset": 16, | 
|  | 81 | +      "type": "<ALPHANUM>", | 
|  | 82 | +      "position": 1 | 
|  | 83 | +    } | 
|  | 84 | +  ] | 
|  | 85 | +} | 
|  | 86 | +``` | 
|  | 87 | + | 
|  | 88 | +You can further examine the impact of the `keyword_marker` token filter by adding the following parameters to the `_analyze` query: | 
|  | 89 | + | 
|  | 90 | +```json | 
|  | 91 | +GET /my_index/_analyze | 
|  | 92 | +{ | 
|  | 93 | +  "analyzer": "custom_analyzer", | 
|  | 94 | +  "text": "This is an OpenSearch example demonstrating keyword marker.", | 
|  | 95 | +  "explain": true, | 
|  | 96 | +  "attributes": "keyword" | 
|  | 97 | +} | 
|  | 98 | +``` | 
|  | 99 | +{% include copy-curl.html %} | 
|  | 100 | + | 
|  | 101 | +This will produce additional details in the response similar to the following: | 
|  | 102 | + | 
|  | 103 | +```json | 
|  | 104 | +{ | 
|  | 105 | +    "name": "porter_stem", | 
|  | 106 | +    "tokens": [ | 
|  | 107 | +      ... | 
|  | 108 | +      { | 
|  | 109 | +        "token": "example", | 
|  | 110 | +        "start_offset": 22, | 
|  | 111 | +        "end_offset": 29, | 
|  | 112 | +        "type": "<ALPHANUM>", | 
|  | 113 | +        "position": 4, | 
|  | 114 | +        "keyword": true | 
|  | 115 | +      }, | 
|  | 116 | +      { | 
|  | 117 | +        "token": "demonstr", | 
|  | 118 | +        "start_offset": 30, | 
|  | 119 | +        "end_offset": 43, | 
|  | 120 | +        "type": "<ALPHANUM>", | 
|  | 121 | +        "position": 5, | 
|  | 122 | +        "keyword": false | 
|  | 123 | +      }, | 
|  | 124 | +      ... | 
|  | 125 | +    ] | 
|  | 126 | +} | 
|  | 127 | +``` | 
0 commit comments