|  | 
|  | 1 | +--- | 
|  | 2 | +layout: default | 
|  | 3 | +title: Elision | 
|  | 4 | +parent: Token filters | 
|  | 5 | +nav_order: 130 | 
|  | 6 | +--- | 
|  | 7 | + | 
|  | 8 | +# Elision token filter | 
|  | 9 | + | 
|  | 10 | +The `elision` token filter is used to remove elided characters from words in certain languages. Elision typically occurs in languages such as French, in which words are often contracted and combined with the following word, typically by omitting a vowel and replacing it with an apostrophe.  | 
|  | 11 | + | 
|  | 12 | +The `elision` token filter is already preconfigured in the following [language analyzers]({{site.url}}{{site.baseurl}}/analyzers/language-analyzers/): `catalan`, `french`, `irish`, and `italian`. | 
|  | 13 | +{: .note} | 
|  | 14 | + | 
|  | 15 | +## Parameters | 
|  | 16 | + | 
|  | 17 | +The custom `elision` token filter can be configured with the following parameters. | 
|  | 18 | + | 
|  | 19 | +Parameter | Required/Optional | Data type | Description | 
|  | 20 | +:--- | :--- | :--- | :---  | 
|  | 21 | +`articles` | Required if `articles_path` is not configured | Array of strings | Defines which articles or short words should be removed when they appear as part of an elision. | 
|  | 22 | +`articles_path` | Required if `articles` is not configured | String | Specifies the path to a custom list of articles that should be removed during the analysis process.  | 
|  | 23 | +`articles_case` | Optional | Boolean | Specifies whether the filter is case sensitive when matching elisions. Default is `false`. | 
|  | 24 | + | 
|  | 25 | +## Example | 
|  | 26 | + | 
|  | 27 | +The default set of French elisions is `l'`, `m'`, `t'`, `qu'`, `n'`, `s'`, `j'`, `d'`, `c'`, `jusqu'`, `quoiqu'`, `lorsqu'`, and `puisqu'`. You can update this by configuring the `french_elision` token filter. The following example request creates a new index named `french_texts` and configures an analyzer with the `french_elision` filter: | 
|  | 28 | + | 
|  | 29 | +```json | 
|  | 30 | +PUT /french_texts | 
|  | 31 | +{ | 
|  | 32 | +  "settings": { | 
|  | 33 | +    "analysis": { | 
|  | 34 | +      "filter": { | 
|  | 35 | +        "french_elision": { | 
|  | 36 | +          "type": "elision", | 
|  | 37 | +          "articles": [ "l", "t", "m", "d", "n", "s", "j" ] | 
|  | 38 | +        } | 
|  | 39 | +      }, | 
|  | 40 | +      "analyzer": { | 
|  | 41 | +        "french_analyzer": { | 
|  | 42 | +          "type": "custom", | 
|  | 43 | +          "tokenizer": "standard", | 
|  | 44 | +          "filter": ["lowercase", "french_elision"] | 
|  | 45 | +        } | 
|  | 46 | +      } | 
|  | 47 | +    } | 
|  | 48 | +  }, | 
|  | 49 | +  "mappings": { | 
|  | 50 | +    "properties": { | 
|  | 51 | +      "text": { | 
|  | 52 | +        "type": "text", | 
|  | 53 | +        "analyzer": "french_analyzer" | 
|  | 54 | +      } | 
|  | 55 | +    } | 
|  | 56 | +  } | 
|  | 57 | +} | 
|  | 58 | + | 
|  | 59 | +``` | 
|  | 60 | +{% include copy-curl.html %} | 
|  | 61 | + | 
|  | 62 | +## Generated tokens | 
|  | 63 | + | 
|  | 64 | +Use the following request to examine the tokens generated using the analyzer: | 
|  | 65 | + | 
|  | 66 | +```json | 
|  | 67 | +POST /french_texts/_analyze | 
|  | 68 | +{ | 
|  | 69 | +  "analyzer": "french_analyzer", | 
|  | 70 | +  "text": "L'étudiant aime l'école et le travail." | 
|  | 71 | +} | 
|  | 72 | +``` | 
|  | 73 | +{% include copy-curl.html %} | 
|  | 74 | + | 
|  | 75 | +The response contains the generated tokens: | 
|  | 76 | + | 
|  | 77 | +```json | 
|  | 78 | +{ | 
|  | 79 | +  "tokens": [ | 
|  | 80 | +    { | 
|  | 81 | +      "token": "étudiant", | 
|  | 82 | +      "start_offset": 0, | 
|  | 83 | +      "end_offset": 10, | 
|  | 84 | +      "type": "<ALPHANUM>", | 
|  | 85 | +      "position": 0 | 
|  | 86 | +    }, | 
|  | 87 | +    { | 
|  | 88 | +      "token": "aime", | 
|  | 89 | +      "start_offset": 11, | 
|  | 90 | +      "end_offset": 15, | 
|  | 91 | +      "type": "<ALPHANUM>", | 
|  | 92 | +      "position": 1 | 
|  | 93 | +    }, | 
|  | 94 | +    { | 
|  | 95 | +      "token": "école", | 
|  | 96 | +      "start_offset": 16, | 
|  | 97 | +      "end_offset": 23, | 
|  | 98 | +      "type": "<ALPHANUM>", | 
|  | 99 | +      "position": 2 | 
|  | 100 | +    }, | 
|  | 101 | +    { | 
|  | 102 | +      "token": "et", | 
|  | 103 | +      "start_offset": 24, | 
|  | 104 | +      "end_offset": 26, | 
|  | 105 | +      "type": "<ALPHANUM>", | 
|  | 106 | +      "position": 3 | 
|  | 107 | +    }, | 
|  | 108 | +    { | 
|  | 109 | +      "token": "le", | 
|  | 110 | +      "start_offset": 27, | 
|  | 111 | +      "end_offset": 29, | 
|  | 112 | +      "type": "<ALPHANUM>", | 
|  | 113 | +      "position": 4 | 
|  | 114 | +    }, | 
|  | 115 | +    { | 
|  | 116 | +      "token": "travail", | 
|  | 117 | +      "start_offset": 30, | 
|  | 118 | +      "end_offset": 37, | 
|  | 119 | +      "type": "<ALPHANUM>", | 
|  | 120 | +      "position": 5 | 
|  | 121 | +    } | 
|  | 122 | +  ] | 
|  | 123 | +} | 
|  | 124 | +``` | 
0 commit comments